This is an alternative patch on a path to D137530.
The basic problem being tackled here is that we need to place a scalar into lane 0 of a vector register before our reduction instructions. Since we only care about lane 0 of the vector, we can use any VL >= 1 provided that the total amount of work performed matches the work performed for a VL=1.
This change does not contain the logic from D137530 to perform the insert at the original VT, and then extract down to LMUL1. That turns out to be a good choice, as discussion in this review has indicated there are issues around LMUL2 and above with our representation of vmv.s.x. We'd also need to be careful with the splat logic for the same reasons.
The only potentially concerning codegen change I spot here is that we stop using a broadcast load (for VL=1) and instead do a scalar load and insert. I think this is probably reasonable; if reviewers disagree, I can investigate using a broadcast load which writes to the undef lanes. If we want to do that, we should do it for VECTOR_INSERT_ELT as well, so that'll end up as it's own patch series.
Differential Revision: https://reviews.llvm.org/D139656
const MVT VecVT = Vec.getSimpleValueType();
const MVT M1VT = getLMUL1VT(VecVT);
const MVT XLenVT = Subtarget.getXLenVT();
+ const bool NonZeroAVL = hasNonZeroAVL(VL);
// The reduction needs an LMUL1 input; do the splat at either LMUL1
// or the original VT if fractional.
auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
- SDValue InitialValue =
- lowerScalarInsert(StartValue, DAG.getConstant(1, DL, XLenVT),
- InnerVT, DL, DAG, Subtarget);
+ // We reuse the VL of the reduction to reduce vsetvli toggles if we can
+ // prove it is non-zero. For the AVL=0 case, we need the scalar to
+ // be the result of the reduction operation.
+ auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
+ SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
+ DAG, Subtarget);
if (M1VT != InnerVT)
InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT,
DAG.getUNDEF(M1VT),
InitialValue, DAG.getConstant(0, DL, XLenVT));
- SDValue PassThru = hasNonZeroAVL(VL) ? DAG.getUNDEF(M1VT) : InitialValue;
+ SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, PassThru, Vec,
InitialValue, Mask, VL);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
return SDValue();
- if (!isOneConstant(ScalarV.getOperand(2)))
+ if (!hasNonZeroAVL(ScalarV.getOperand(2)))
return SDValue();
// Check the scalar of ScalarV is neutral element
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s
declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>)
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vfadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vfredusum.vs v8, v8, v16
+; CHECK-NEXT: vfredusum.vs v8, v8, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
define half @vreduce_ord_fadd_v128f16(<128 x half>* %x, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_v128f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: li a2, 64
-; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: li a1, 64
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
-; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-NEXT: vfredosum.vs v16, v16, v24
-; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, ft0
-; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-NEXT: vfredosum.vs v8, v8, v16
+; CHECK-NEXT: vfredosum.vs v8, v8, v24
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: vfmv.s.f v8, ft0
+; CHECK-NEXT: vfredosum.vs v8, v16, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v12
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vfadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vfredusum.vs v8, v8, v16
+; CHECK-NEXT: vfredusum.vs v8, v8, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x float>, <64 x float>* %x
define float @vreduce_ord_fadd_v64f32(<64 x float>* %x, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_v64f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: li a2, 32
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v8, (a1)
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vfredosum.vs v16, v16, v24
-; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, ft0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vfredosum.vs v8, v8, v16
+; CHECK-NEXT: vfredosum.vs v8, v8, v24
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: vfmv.s.f v8, ft0
+; CHECK-NEXT: vfredosum.vs v8, v16, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x float>, <64 x float>* %x
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vfredusum.vs v8, v24, v8
+; CHECK-NEXT: vfwadd.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v0, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x half>, <64 x half>* %x
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v16, a0
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v16, v16, v24
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v12
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v24, v8
; CHECK-NEXT: vle32.v v16, (a0)
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v16, 16
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v16, v16, v24
; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v16
declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
define half @vreduce_fmin_v2f16(<2 x half>* %x) {
-; CHECK-LABEL: vreduce_fmin_v2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI68_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI68_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v2f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI68_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI68_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v2f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI68_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI68_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x half>, <2 x half>* %x
%red = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> %v)
ret half %red
declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
define half @vreduce_fmin_v4f16(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI69_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI69_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI69_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI69_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI69_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI69_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v)
ret half %red
}
define half @vreduce_fmin_v4f16_nonans(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f16_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI70_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI70_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f16_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI70_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI70_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f16_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI70_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI70_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v)
ret half %red
}
define half @vreduce_fmin_v4f16_nonans_noinfs(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f16_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI71_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI71_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f16_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI71_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI71_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f16_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI71_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI71_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v)
ret half %red
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: lui a1, %hi(.LCPI72_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI72_0)(a1)
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfmv.s.f v24, ft0
; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI72_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI72_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v16, (a0), zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v16
+; CHECK-NEXT: vfredmin.vs v8, v8, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
define float @vreduce_fmin_v2f32(<2 x float>* %x) {
-; CHECK-LABEL: vreduce_fmin_v2f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI73_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI73_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI73_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v2f32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI73_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI73_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x float>, <2 x float>* %x
%red = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %v)
ret float %red
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
define float @vreduce_fmin_v4f32(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI74_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI74_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI74_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI74_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI74_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
ret float %red
}
define float @vreduce_fmin_v4f32_nonans(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f32_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI75_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI75_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f32_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI75_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI75_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f32_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI75_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI75_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
ret float %red
}
define float @vreduce_fmin_v4f32_nonans_noinfs(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f32_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI76_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI76_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f32_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI76_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI76_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f32_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI76_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI76_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
ret float %red
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: addi a2, a0, 384
-; CHECK-NEXT: vle32.v v16, (a2)
-; CHECK-NEXT: addi a2, a0, 256
+; CHECK-NEXT: addi a1, a0, 384
+; CHECK-NEXT: vle32.v v16, (a1)
+; CHECK-NEXT: addi a1, a0, 256
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v24, (a0)
-; CHECK-NEXT: vle32.v v0, (a2)
+; CHECK-NEXT: vle32.v v0, (a1)
+; CHECK-NEXT: lui a0, %hi(.LCPI77_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI77_0)(a0)
; CHECK-NEXT: vfmin.vv v16, v24, v16
; CHECK-NEXT: vfmin.vv v8, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI77_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI77_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v16, (a0), zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
define double @vreduce_fmin_v2f64(<2 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v2f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI78_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI78_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI78_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI78_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v2f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI78_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI78_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x double>, <2 x double>* %x
%red = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %v)
ret double %red
declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
define double @vreduce_fmin_v4f64(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI79_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI79_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI79_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI79_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI79_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI79_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v)
ret double %red
}
define double @vreduce_fmin_v4f64_nonans(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f64_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI80_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI80_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f64_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI80_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI80_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f64_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI80_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI80_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v)
ret double %red
}
define double @vreduce_fmin_v4f64_nonans_noinfs(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f64_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI81_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI81_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f64_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI81_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI81_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f64_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI81_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI81_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call nnan ninf double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v)
ret double %red
declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>)
define double @vreduce_fmin_v32f64(<32 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v32f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: vle64.v v16, (a0)
-; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI82_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI82_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v16, (a0), zero
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v16
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v32f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: addi a0, a0, 128
+; RV32-NEXT: lui a1, %hi(.LCPI82_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI82_0)(a1)
+; RV32-NEXT: vle64.v v16, (a0)
+; RV32-NEXT: vfmv.s.f v24, ft0
+; RV32-NEXT: vfmin.vv v8, v8, v16
+; RV32-NEXT: vfredmin.vs v8, v8, v24
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v32f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI82_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI82_0)(a1)
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a0, a0, 128
+; RV64-NEXT: vle64.v v16, (a0)
+; RV64-NEXT: vfmv.s.f v24, ft0
+; RV64-NEXT: vfmin.vv v8, v8, v16
+; RV64-NEXT: vfredmin.vs v8, v8, v24
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <32 x double>, <32 x double>* %x
%red = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %v)
ret double %red
declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
define half @vreduce_fmax_v2f16(<2 x half>* %x) {
-; CHECK-LABEL: vreduce_fmax_v2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI83_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI83_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v2f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI83_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI83_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v2f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI83_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI83_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x half>, <2 x half>* %x
%red = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> %v)
ret half %red
declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
define half @vreduce_fmax_v4f16(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI84_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI84_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI84_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI84_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI84_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI84_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v)
ret half %red
}
define half @vreduce_fmax_v4f16_nonans(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f16_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI85_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI85_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f16_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI85_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI85_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f16_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI85_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI85_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v)
ret half %red
}
define half @vreduce_fmax_v4f16_nonans_noinfs(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f16_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI86_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI86_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f16_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI86_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI86_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f16_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI86_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI86_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v)
ret half %red
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: lui a1, %hi(.LCPI87_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI87_0)(a1)
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfmv.s.f v24, ft0
; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI87_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI87_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v16, (a0), zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v16
+; CHECK-NEXT: vfredmax.vs v8, v8, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
define float @vreduce_fmax_v2f32(<2 x float>* %x) {
-; CHECK-LABEL: vreduce_fmax_v2f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI88_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI88_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI88_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI88_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v2f32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI88_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI88_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x float>, <2 x float>* %x
%red = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %v)
ret float %red
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
define float @vreduce_fmax_v4f32(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI89_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI89_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI89_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI89_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI89_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI89_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
ret float %red
}
define float @vreduce_fmax_v4f32_nonans(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f32_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI90_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI90_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f32_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI90_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI90_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f32_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI90_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI90_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
ret float %red
}
define float @vreduce_fmax_v4f32_nonans_noinfs(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f32_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI91_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI91_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f32_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI91_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI91_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f32_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI91_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI91_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
ret float %red
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: addi a2, a0, 384
-; CHECK-NEXT: vle32.v v16, (a2)
-; CHECK-NEXT: addi a2, a0, 256
+; CHECK-NEXT: addi a1, a0, 384
+; CHECK-NEXT: vle32.v v16, (a1)
+; CHECK-NEXT: addi a1, a0, 256
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v24, (a0)
-; CHECK-NEXT: vle32.v v0, (a2)
+; CHECK-NEXT: vle32.v v0, (a1)
+; CHECK-NEXT: lui a0, %hi(.LCPI92_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI92_0)(a0)
; CHECK-NEXT: vfmax.vv v16, v24, v16
; CHECK-NEXT: vfmax.vv v8, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI92_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI92_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v16, (a0), zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
define double @vreduce_fmax_v2f64(<2 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v2f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI93_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI93_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI93_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI93_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v2f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI93_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI93_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x double>, <2 x double>* %x
%red = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %v)
ret double %red
declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
define double @vreduce_fmax_v4f64(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI94_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI94_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI94_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI94_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI94_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI94_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v)
ret double %red
}
define double @vreduce_fmax_v4f64_nonans(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f64_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI95_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI95_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f64_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI95_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI95_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f64_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI95_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI95_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v)
ret double %red
}
define double @vreduce_fmax_v4f64_nonans_noinfs(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f64_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI96_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI96_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f64_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI96_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI96_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f64_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI96_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI96_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call nnan ninf double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v)
ret double %red
declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>)
define double @vreduce_fmax_v32f64(<32 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v32f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: vle64.v v16, (a0)
-; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI97_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI97_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v16, (a0), zero
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v16
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v32f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: addi a0, a0, 128
+; RV32-NEXT: lui a1, %hi(.LCPI97_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI97_0)(a1)
+; RV32-NEXT: vle64.v v16, (a0)
+; RV32-NEXT: vfmv.s.f v24, ft0
+; RV32-NEXT: vfmax.vv v8, v8, v16
+; RV32-NEXT: vfredmax.vs v8, v8, v24
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v32f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI97_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI97_0)(a1)
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a0, a0, 128
+; RV64-NEXT: vle64.v v16, (a0)
+; RV64-NEXT: vfmv.s.f v24, ft0
+; RV64-NEXT: vfmax.vv v8, v8, v16
+; RV64-NEXT: vfredmax.vs v8, v8, v24
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <32 x double>, <32 x double>* %x
%red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v)
ret double %red
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v8, v16
+; CHECK-NEXT: vredsum.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v10
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v12
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v12
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v8, v16
+; CHECK-NEXT: vredsum.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v24, v8
+; CHECK-NEXT: vwadd.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
+; CHECK-NEXT: vredsum.vs v8, v0, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i8>, <128 x i8>* %x
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwaddu.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v24, v8
+; CHECK-NEXT: vwaddu.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
+; CHECK-NEXT: vredsum.vs v8, v0, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i8>, <128 x i8>* %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v12
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v12
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v8, v16
+; CHECK-NEXT: vredsum.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v24, v8
+; CHECK-NEXT: vwadd.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vredsum.vs v8, v0, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i16>, <64 x i16>* %x
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwaddu.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v24, v8
+; CHECK-NEXT: vwaddu.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vredsum.vs v8, v0, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i16>, <64 x i16>* %x
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v9
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v9
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v12
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwadd.vv v24, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredsum.vs v8, v24, v8
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwaddu.vv v24, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredsum.vs v8, v24, v8
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v12
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
; CHECK-NEXT: vand.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v12
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
; CHECK-NEXT: vand.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v12
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
; CHECK-NEXT: vand.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v9, -1
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v10, -1
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v16
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v16, (a0)
; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v16
; RV64-NEXT: vand.vv v16, v24, v16
; RV64-NEXT: vand.vv v8, v8, v0
; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v16
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredor.vs v8, v8, v16
+; CHECK-NEXT: vredor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredor.vs v8, v8, v16
+; CHECK-NEXT: vredor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredor.vs v8, v8, v16
+; CHECK-NEXT: vredor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vxor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredxor.vs v8, v8, v16
+; CHECK-NEXT: vredxor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vxor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredxor.vs v8, v8, v16
+; CHECK-NEXT: vredxor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vxor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredxor.vs v8, v8, v16
+; CHECK-NEXT: vredxor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
-; CHECK-NEXT: vmin.vv v8, v8, v16
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredmin.vs v8, v8, v16
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: vmin.vv v8, v8, v16
+; CHECK-NEXT: vredmin.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vmv.s.x v12, a0
-; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v12
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, a0
-; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v12
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vmv.s.x v16, a0
-; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v16
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vmv.s.x v16, a0
-; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v16
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle16.v v16, (a0)
-; RV32-NEXT: vmin.vv v8, v8, v16
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v16, a0
-; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV32-NEXT: vredmin.vs v8, v8, v16
+; RV32-NEXT: vmv.s.x v24, a0
+; RV32-NEXT: vmin.vv v8, v8, v16
+; RV32-NEXT: vredmin.vs v8, v8, v24
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
;
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle16.v v16, (a0)
-; RV64-NEXT: vmin.vv v8, v8, v16
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v16, a0
-; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV64-NEXT: vredmin.vs v8, v8, v16
+; RV64-NEXT: vmv.s.x v24, a0
+; RV64-NEXT: vmin.vv v8, v8, v16
+; RV64-NEXT: vredmin.vs v8, v8, v24
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v16, a0
-; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v16
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v16, a0
-; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v16
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle32.v v16, (a0)
-; RV32-NEXT: vmin.vv v8, v8, v16
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v16, a0
-; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; RV32-NEXT: vredmin.vs v8, v8, v16
+; RV32-NEXT: vmv.s.x v24, a0
+; RV32-NEXT: vmin.vv v8, v8, v16
+; RV32-NEXT: vredmin.vs v8, v8, v24
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
;
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle32.v v16, (a0)
-; RV64-NEXT: vmin.vv v8, v8, v16
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v16, a0
-; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; RV64-NEXT: vredmin.vs v8, v8, v16
+; RV64-NEXT: vmv.s.x v24, a0
+; RV64-NEXT: vmin.vv v8, v8, v16
+; RV64-NEXT: vredmin.vs v8, v8, v24
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
-; CHECK-NEXT: vmax.vv v8, v8, v16
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredmax.vs v8, v8, v16
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: vmax.vv v8, v8, v16
+; CHECK-NEXT: vredmax.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
-; CHECK-NEXT: vmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredmax.vs v8, v8, v16
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: vmax.vv v8, v8, v16
+; CHECK-NEXT: vredmax.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: vmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredmax.vs v8, v8, v16
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: vmax.vv v8, v8, v16
+; CHECK-NEXT: vredmax.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v12
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
; CHECK-NEXT: vminu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v12
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
; CHECK-NEXT: vminu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v12
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
; CHECK-NEXT: vminu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v9, -1
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v10, -1
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v16
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v16, (a0)
; RV64-NEXT: vminu.vv v8, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v16
; RV64-NEXT: vminu.vv v16, v24, v16
; RV64-NEXT: vminu.vv v8, v8, v0
; RV64-NEXT: vminu.vv v8, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v16
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vmaxu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredmaxu.vs v8, v8, v16
+; CHECK-NEXT: vredmaxu.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vmaxu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredmaxu.vs v8, v8, v16
+; CHECK-NEXT: vredmaxu.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vmaxu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredmaxu.vs v8, v8, v16
+; CHECK-NEXT: vredmaxu.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
define i64 @reduce_add(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_add:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
define i64 @reduce_add2(<4 x i64> %v) {
; CHECK-LABEL: reduce_add2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
define i64 @reduce_and(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_and:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
define i64 @reduce_and2(<4 x i64> %v) {
; CHECK-LABEL: reduce_and2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
define i64 @reduce_or(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_or:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v10
define i64 @reduce_or2(<4 x i64> %v) {
; CHECK-LABEL: reduce_or2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v10
define i64 @reduce_xor(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_xor:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v10
define i64 @reduce_xor2(<4 x i64> %v) {
; CHECK-LABEL: reduce_xor2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v10
define i64 @reduce_umax(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_umax:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
define i64 @reduce_umax2(<4 x i64> %v) {
; CHECK-LABEL: reduce_umax2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
define i64 @reduce_umin(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_umin:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
define i64 @reduce_umin2(<4 x i64> %v) {
; CHECK-LABEL: reduce_umin2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
define i64 @reduce_smax(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_smax:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
define i64 @reduce_smax2(<4 x i64> %v) {
; CHECK-LABEL: reduce_smax2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
define i64 @reduce_smin(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_smin:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v10
define i64 @reduce_smin2(<4 x i64> %v) {
; CHECK-LABEL: reduce_smin2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v10
define float @reduce_fadd(float %x, <4 x float> %v) {
; CHECK-LABEL: reduce_fadd:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @reduce_fadd2(float %x, <4 x float> %v) {
; CHECK-LABEL: reduce_fadd2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @reduce_fmax(float %x, <4 x float> %v) {
; CHECK-LABEL: reduce_fmax:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @reduce_fmin(float %x, <4 x float> %v) {
; CHECK-LABEL: reduce_fmin:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-LABEL: vreduce_fadd_nxv1f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv1f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv2f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv4f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v10
define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v10
define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v10
define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v10
define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v12
define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v12
define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
; CHECK-LABEL: vreduce_fmin_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI30_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI30_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI31_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI31_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI32_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI32_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI33_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI33_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI34_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI34_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: vreduce_fmin_nxv64f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI35_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v16, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI35_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI36_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI36_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI37_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI37_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI37_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI38_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI39_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI39_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI39_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI40_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v10, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI40_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfredmin.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: vreduce_fmin_nxv32f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI41_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI41_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v16, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI41_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI42_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI42_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI43_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI43_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI43_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI44_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI44_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI45_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI45_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI45_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfredmin.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-LABEL: vreduce_fmin_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v12, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI46_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfredmin.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: vreduce_fmin_nxv16f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI47_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI47_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v16, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI47_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI48_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI48_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI48_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI49_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI49_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI50_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI50_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI51_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI51_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI52_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI52_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: vreduce_fmax_nxv64f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI53_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v16, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI53_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI54_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI54_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI54_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI55_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI55_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI55_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI56_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI56_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI56_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI57_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI57_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI57_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI58_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v10, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI58_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfredmax.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: vreduce_fmax_nxv32f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI59_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI59_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v16, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI59_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI60_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI60_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI60_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI61_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI61_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI62_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI62_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI62_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI63_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI63_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI63_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfredmax.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-LABEL: vreduce_fmax_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI64_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI64_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v12, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI64_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfredmax.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: vreduce_fmax_nxv16f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI65_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI65_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v16, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI65_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: vreduce_nsz_fadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vfmv.v.f v9, ft0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vfmv.v.f v10, ft0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v9, v10, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v10
; CHECK-NEXT: vslideup.vi v11, v12, 0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v11, v12, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v12
define half @vreduce_ord_fadd_nxv12f16(<vscale x 12 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv12f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: fmv.h.x ft0, zero
; CHECK-NEXT: fneg.h ft0, ft0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.v.f v11, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v12
; CHECK-NEXT: vfmv.v.f v9, ft0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vfmv.v.f v10, ft0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v9, v10, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v10
; CHECK-NEXT: vslideup.vi v11, v12, 0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v11, v12, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfredmin.vs v8, v8, v12
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI74_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI74_0)(a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vfmv.v.f v11, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfredmax.vs v8, v8, v12
define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_add_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_umax_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv1i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_umin_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smin_nxv1i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_and_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_or_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_xor_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_add_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_umax_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_umin_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smin_nxv2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_and_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_or_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_xor_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_add_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_umax_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_umin_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smin_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_and_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_or_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_xor_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_add_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vwreduce_add_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_umax_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv1i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_umin_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_and_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_or_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_xor_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_add_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vwreduce_add_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_umax_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_umin_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_and_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_or_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_xor_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_add_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vwreduce_add_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_umax_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_umin_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_and_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_or_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_xor_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_add_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vwreduce_add_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_umax_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv1i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_umin_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_and_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_or_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_xor_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_add_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vwreduce_add_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_umax_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_umin_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_and_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_or_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_xor_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_add_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vwreduce_add_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_umax_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
; CHECK-LABEL: vreduce_smax_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_umin_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v10
define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_and_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_or_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v10
define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_xor_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v10
;
; RV64-LABEL: vreduce_add_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vredsum.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vwreduce_add_nxv1i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v9
; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
;
; RV64-LABEL: vwreduce_uadd_nxv1i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v9
; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
;
; RV64-LABEL: vreduce_umax_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vredmaxu.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: slli a0, a0, 63
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vredmax.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_umin_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.i v9, -1
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.i v9, -1
; RV64-NEXT: vredminu.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_and_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.i v9, -1
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.i v9, -1
; RV64-NEXT: vredand.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_or_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vredor.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_xor_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vredxor.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_add_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredsum.vs v8, v8, v10
;
; RV64-LABEL: vwreduce_add_nxv2i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v9
;
; RV64-LABEL: vwreduce_uadd_nxv2i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v9
;
; RV64-LABEL: vreduce_umax_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredmaxu.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: slli a0, a0, 63
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredmax.vs v8, v8, v10
;
; RV64-LABEL: vreduce_umin_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v10, -1
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v10
;
; RV64-LABEL: vreduce_and_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v10, -1
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v10
;
; RV64-LABEL: vreduce_or_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredor.vs v8, v8, v10
;
; RV64-LABEL: vreduce_xor_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredxor.vs v8, v8, v10
;
; RV64-LABEL: vreduce_add_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredsum.vs v8, v8, v12
;
; RV64-LABEL: vwreduce_add_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v10
;
; RV64-LABEL: vwreduce_uadd_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v10
;
; RV64-LABEL: vreduce_umax_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredmaxu.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: slli a0, a0, 63
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, a0
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredmax.vs v8, v8, v12
;
; RV64-LABEL: vreduce_umin_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, a0
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v12
;
; RV64-LABEL: vreduce_and_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v12
;
; RV64-LABEL: vreduce_or_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredor.vs v8, v8, v12
;
; RV64-LABEL: vreduce_xor_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredxor.vs v8, v8, v12