const MVT VecVT = Vec.getSimpleValueType();
const MVT M1VT = getLMUL1VT(VecVT);
const MVT XLenVT = Subtarget.getXLenVT();
+ const bool NonZeroAVL = hasNonZeroAVL(VL);
// The reduction needs an LMUL1 input; do the splat at either LMUL1
// or the original VT if fractional.
auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
- SDValue InitialValue =
- lowerScalarInsert(StartValue, DAG.getConstant(1, DL, XLenVT),
- InnerVT, DL, DAG, Subtarget);
+ // We reuse the VL of the reduction to reduce vsetvli toggles if we can
+ // prove it is non-zero. For the AVL=0 case, we need the scalar to
+ // be the result of the reduction operation.
+ auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
+ SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
+ DAG, Subtarget);
if (M1VT != InnerVT)
InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT,
DAG.getUNDEF(M1VT),
InitialValue, DAG.getConstant(0, DL, XLenVT));
- SDValue PassThru = hasNonZeroAVL(VL) ? DAG.getUNDEF(M1VT) : InitialValue;
+ SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, PassThru, Vec,
InitialValue, Mask, VL);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
return SDValue();
- if (!isOneConstant(ScalarV.getOperand(2)))
+ if (!hasNonZeroAVL(ScalarV.getOperand(2)))
return SDValue();
// Check the scalar of ScalarV is neutral element
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s
declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>)
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vfadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vfredusum.vs v8, v8, v16
+; CHECK-NEXT: vfredusum.vs v8, v8, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
define half @vreduce_ord_fadd_v128f16(<128 x half>* %x, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_v128f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: li a2, 64
-; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: li a1, 64
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
-; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-NEXT: vfredosum.vs v16, v16, v24
-; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, ft0
-; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-NEXT: vfredosum.vs v8, v8, v16
+; CHECK-NEXT: vfredosum.vs v8, v8, v24
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: vfmv.s.f v8, ft0
+; CHECK-NEXT: vfredosum.vs v8, v16, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v12
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vfadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, fa0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vfredusum.vs v8, v8, v16
+; CHECK-NEXT: vfredusum.vs v8, v8, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x float>, <64 x float>* %x
define float @vreduce_ord_fadd_v64f32(<64 x float>* %x, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_v64f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: li a2, 32
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v8, (a1)
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vfredosum.vs v16, v16, v24
-; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, ft0
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vfredosum.vs v8, v8, v16
+; CHECK-NEXT: vfredosum.vs v8, v8, v24
+; CHECK-NEXT: vfmv.f.s ft0, v8
+; CHECK-NEXT: vfmv.s.f v8, ft0
+; CHECK-NEXT: vfredosum.vs v8, v16, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x float>, <64 x float>* %x
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vfredusum.vs v8, v24, v8
+; CHECK-NEXT: vfwadd.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v0, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x half>, <64 x half>* %x
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v16, a0
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v16, v16, v24
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v12
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v24, v8
; CHECK-NEXT: vle32.v v16, (a0)
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v16, 16
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v16, v16, v24
; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v16
declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
define half @vreduce_fmin_v2f16(<2 x half>* %x) {
-; CHECK-LABEL: vreduce_fmin_v2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI68_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI68_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v2f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI68_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI68_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v2f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI68_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI68_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x half>, <2 x half>* %x
%red = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> %v)
ret half %red
declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
define half @vreduce_fmin_v4f16(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI69_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI69_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI69_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI69_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI69_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI69_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v)
ret half %red
}
define half @vreduce_fmin_v4f16_nonans(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f16_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI70_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI70_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f16_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI70_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI70_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f16_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI70_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI70_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v)
ret half %red
}
define half @vreduce_fmin_v4f16_nonans_noinfs(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f16_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI71_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI71_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f16_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI71_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI71_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f16_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI71_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI71_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v)
ret half %red
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: lui a1, %hi(.LCPI72_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI72_0)(a1)
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfmv.s.f v24, ft0
; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI72_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI72_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v16, (a0), zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v16
+; CHECK-NEXT: vfredmin.vs v8, v8, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
define float @vreduce_fmin_v2f32(<2 x float>* %x) {
-; CHECK-LABEL: vreduce_fmin_v2f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI73_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI73_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI73_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v2f32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI73_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI73_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x float>, <2 x float>* %x
%red = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %v)
ret float %red
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
define float @vreduce_fmin_v4f32(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI74_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI74_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI74_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI74_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI74_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
ret float %red
}
define float @vreduce_fmin_v4f32_nonans(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f32_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI75_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI75_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f32_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI75_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI75_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f32_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI75_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI75_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
ret float %red
}
define float @vreduce_fmin_v4f32_nonans_noinfs(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f32_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI76_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI76_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f32_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI76_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI76_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f32_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI76_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI76_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
ret float %red
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: addi a2, a0, 384
-; CHECK-NEXT: vle32.v v16, (a2)
-; CHECK-NEXT: addi a2, a0, 256
+; CHECK-NEXT: addi a1, a0, 384
+; CHECK-NEXT: vle32.v v16, (a1)
+; CHECK-NEXT: addi a1, a0, 256
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v24, (a0)
-; CHECK-NEXT: vle32.v v0, (a2)
+; CHECK-NEXT: vle32.v v0, (a1)
+; CHECK-NEXT: lui a0, %hi(.LCPI77_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI77_0)(a0)
; CHECK-NEXT: vfmin.vv v16, v24, v16
; CHECK-NEXT: vfmin.vv v8, v8, v0
; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI77_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI77_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v16, (a0), zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
define double @vreduce_fmin_v2f64(<2 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v2f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI78_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI78_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI78_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI78_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v2f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI78_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI78_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x double>, <2 x double>* %x
%red = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %v)
ret double %red
declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
define double @vreduce_fmin_v4f64(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI79_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI79_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI79_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI79_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI79_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI79_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v)
ret double %red
}
define double @vreduce_fmin_v4f64_nonans(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f64_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI80_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI80_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f64_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI80_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI80_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f64_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI80_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI80_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v)
ret double %red
}
define double @vreduce_fmin_v4f64_nonans_noinfs(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v4f64_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI81_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI81_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v4f64_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI81_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI81_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmin.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v4f64_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI81_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI81_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmin.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call nnan ninf double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v)
ret double %red
declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>)
define double @vreduce_fmin_v32f64(<32 x double>* %x) {
-; CHECK-LABEL: vreduce_fmin_v32f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: vle64.v v16, (a0)
-; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI82_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI82_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v16, (a0), zero
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vfredmin.vs v8, v8, v16
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmin_v32f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: addi a0, a0, 128
+; RV32-NEXT: lui a1, %hi(.LCPI82_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI82_0)(a1)
+; RV32-NEXT: vle64.v v16, (a0)
+; RV32-NEXT: vfmv.s.f v24, ft0
+; RV32-NEXT: vfmin.vv v8, v8, v16
+; RV32-NEXT: vfredmin.vs v8, v8, v24
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmin_v32f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI82_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI82_0)(a1)
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a0, a0, 128
+; RV64-NEXT: vle64.v v16, (a0)
+; RV64-NEXT: vfmv.s.f v24, ft0
+; RV64-NEXT: vfmin.vv v8, v8, v16
+; RV64-NEXT: vfredmin.vs v8, v8, v24
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <32 x double>, <32 x double>* %x
%red = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %v)
ret double %red
declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
define half @vreduce_fmax_v2f16(<2 x half>* %x) {
-; CHECK-LABEL: vreduce_fmax_v2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI83_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI83_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v2f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI83_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI83_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v2f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI83_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI83_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x half>, <2 x half>* %x
%red = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> %v)
ret half %red
declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
define half @vreduce_fmax_v4f16(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI84_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI84_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI84_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI84_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI84_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI84_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v)
ret half %red
}
define half @vreduce_fmax_v4f16_nonans(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f16_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI85_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI85_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f16_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI85_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI85_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f16_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI85_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI85_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v)
ret half %red
}
define half @vreduce_fmax_v4f16_nonans_noinfs(<4 x half>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f16_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI86_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI86_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f16_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI86_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI86_0)(a1)
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f16_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI86_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI86_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x half>, <4 x half>* %x
%red = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v)
ret half %red
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: lui a1, %hi(.LCPI87_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI87_0)(a1)
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vfmv.s.f v24, ft0
; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI87_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI87_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v16, (a0), zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v16
+; CHECK-NEXT: vfredmax.vs v8, v8, v24
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x half>, <128 x half>* %x
declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
define float @vreduce_fmax_v2f32(<2 x float>* %x) {
-; CHECK-LABEL: vreduce_fmax_v2f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI88_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI88_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI88_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI88_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v2f32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI88_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI88_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x float>, <2 x float>* %x
%red = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %v)
ret float %red
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
define float @vreduce_fmax_v4f32(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI89_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI89_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI89_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI89_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI89_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI89_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
ret float %red
}
define float @vreduce_fmax_v4f32_nonans(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f32_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI90_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI90_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f32_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI90_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI90_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f32_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI90_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI90_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
ret float %red
}
define float @vreduce_fmax_v4f32_nonans_noinfs(<4 x float>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f32_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI91_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI91_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f32_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI91_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI91_0)(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f32_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI91_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI91_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x float>, <4 x float>* %x
%red = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
ret float %red
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: addi a2, a0, 384
-; CHECK-NEXT: vle32.v v16, (a2)
-; CHECK-NEXT: addi a2, a0, 256
+; CHECK-NEXT: addi a1, a0, 384
+; CHECK-NEXT: vle32.v v16, (a1)
+; CHECK-NEXT: addi a1, a0, 256
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v24, (a0)
-; CHECK-NEXT: vle32.v v0, (a2)
+; CHECK-NEXT: vle32.v v0, (a1)
+; CHECK-NEXT: lui a0, %hi(.LCPI92_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI92_0)(a0)
; CHECK-NEXT: vfmax.vv v16, v24, v16
; CHECK-NEXT: vfmax.vv v8, v8, v0
; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI92_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI92_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v16, (a0), zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
define double @vreduce_fmax_v2f64(<2 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v2f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI93_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI93_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v9
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v2f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI93_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI93_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v9, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v9
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v2f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI93_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI93_0)(a1)
+; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v9, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v9
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <2 x double>, <2 x double>* %x
%red = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %v)
ret double %red
declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
define double @vreduce_fmax_v4f64(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI94_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI94_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI94_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI94_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI94_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI94_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v)
ret double %red
}
define double @vreduce_fmax_v4f64_nonans(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f64_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI95_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI95_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f64_nonans:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI95_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI95_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f64_nonans:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI95_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI95_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v)
ret double %red
}
define double @vreduce_fmax_v4f64_nonans_noinfs(<4 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v4f64_nonans_noinfs:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI96_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI96_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v10
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v4f64_nonans_noinfs:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: lui a1, %hi(.LCPI96_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI96_0)(a1)
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vfmv.s.f v10, ft0
+; RV32-NEXT: vfredmax.vs v8, v8, v10
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v4f64_nonans_noinfs:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI96_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI96_0)(a1)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vfmv.s.f v10, ft0
+; RV64-NEXT: vfredmax.vs v8, v8, v10
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <4 x double>, <4 x double>* %x
%red = call nnan ninf double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v)
ret double %red
declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>)
define double @vreduce_fmax_v32f64(<32 x double>* %x) {
-; CHECK-LABEL: vreduce_fmax_v32f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: vle64.v v16, (a0)
-; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: lui a0, %hi(.LCPI97_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI97_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v16, (a0), zero
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vfredmax.vs v8, v8, v16
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fmax_v32f64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: addi a0, a0, 128
+; RV32-NEXT: lui a1, %hi(.LCPI97_0)
+; RV32-NEXT: fld ft0, %lo(.LCPI97_0)(a1)
+; RV32-NEXT: vle64.v v16, (a0)
+; RV32-NEXT: vfmv.s.f v24, ft0
+; RV32-NEXT: vfmax.vv v8, v8, v16
+; RV32-NEXT: vfredmax.vs v8, v8, v24
+; RV32-NEXT: vfmv.f.s fa0, v8
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmax_v32f64:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI97_0)
+; RV64-NEXT: fld ft0, %lo(.LCPI97_0)(a1)
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a0, a0, 128
+; RV64-NEXT: vle64.v v16, (a0)
+; RV64-NEXT: vfmv.s.f v24, ft0
+; RV64-NEXT: vfmax.vv v8, v8, v16
+; RV64-NEXT: vfredmax.vs v8, v8, v24
+; RV64-NEXT: vfmv.f.s fa0, v8
+; RV64-NEXT: ret
%v = load <32 x double>, <32 x double>* %x
%red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v)
ret double %red
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v8, v16
+; CHECK-NEXT: vredsum.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v10
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v12
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v12
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v8, v16
+; CHECK-NEXT: vredsum.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v24, v8
+; CHECK-NEXT: vwadd.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
+; CHECK-NEXT: vredsum.vs v8, v0, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i8>, <128 x i8>* %x
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vwaddu.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v24, v8
+; CHECK-NEXT: vwaddu.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
+; CHECK-NEXT: vredsum.vs v8, v0, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i8>, <128 x i8>* %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v12
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v12
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vadd.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v8, v16
+; CHECK-NEXT: vredsum.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v24, v8
+; CHECK-NEXT: vwadd.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vredsum.vs v8, v0, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i16>, <64 x i16>* %x
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vwaddu.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vredsum.vs v8, v24, v8
+; CHECK-NEXT: vwaddu.vv v0, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vredsum.vs v8, v0, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i16>, <64 x i16>* %x
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v9
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v9
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v12
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwadd.vv v24, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredsum.vs v8, v24, v8
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwaddu.vv v24, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredsum.vs v8, v24, v8
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v12
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
; CHECK-NEXT: vand.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v12
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
; CHECK-NEXT: vand.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v12
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
; CHECK-NEXT: vand.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v16
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v9, -1
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v10, -1
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v16
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v16, (a0)
; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v16
; RV64-NEXT: vand.vv v16, v24, v16
; RV64-NEXT: vand.vv v8, v8, v0
; RV64-NEXT: vand.vv v8, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v16
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredor.vs v8, v8, v16
+; CHECK-NEXT: vredor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredor.vs v8, v8, v16
+; CHECK-NEXT: vredor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredor.vs v8, v8, v16
+; CHECK-NEXT: vredor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vxor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredxor.vs v8, v8, v16
+; CHECK-NEXT: vredxor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vxor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredxor.vs v8, v8, v16
+; CHECK-NEXT: vredxor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vxor.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredxor.vs v8, v8, v16
+; CHECK-NEXT: vredxor.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
-; CHECK-NEXT: vmin.vv v8, v8, v16
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredmin.vs v8, v8, v16
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: vmin.vv v8, v8, v16
+; CHECK-NEXT: vredmin.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vmv.s.x v12, a0
-; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v12
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, a0
-; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v12
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vmv.s.x v16, a0
-; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v16
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vmv.s.x v16, a0
-; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v16
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle16.v v16, (a0)
-; RV32-NEXT: vmin.vv v8, v8, v16
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v16, a0
-; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV32-NEXT: vredmin.vs v8, v8, v16
+; RV32-NEXT: vmv.s.x v24, a0
+; RV32-NEXT: vmin.vv v8, v8, v16
+; RV32-NEXT: vredmin.vs v8, v8, v24
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
;
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle16.v v16, (a0)
-; RV64-NEXT: vmin.vv v8, v8, v16
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v16, a0
-; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; RV64-NEXT: vredmin.vs v8, v8, v16
+; RV64-NEXT: vmv.s.x v24, a0
+; RV64-NEXT: vmin.vv v8, v8, v16
+; RV64-NEXT: vredmin.vs v8, v8, v24
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v16, a0
-; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v16
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v16, a0
-; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v16
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle32.v v16, (a0)
-; RV32-NEXT: vmin.vv v8, v8, v16
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v16, a0
-; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; RV32-NEXT: vredmin.vs v8, v8, v16
+; RV32-NEXT: vmv.s.x v24, a0
+; RV32-NEXT: vmin.vv v8, v8, v16
+; RV32-NEXT: vredmin.vs v8, v8, v24
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
;
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle32.v v16, (a0)
-; RV64-NEXT: vmin.vv v8, v8, v16
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vmv.s.x v16, a0
-; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; RV64-NEXT: vredmin.vs v8, v8, v16
+; RV64-NEXT: vmv.s.x v24, a0
+; RV64-NEXT: vmin.vv v8, v8, v16
+; RV64-NEXT: vredmin.vs v8, v8, v24
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
-; CHECK-NEXT: vmax.vv v8, v8, v16
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredmax.vs v8, v8, v16
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: vmax.vv v8, v8, v16
+; CHECK-NEXT: vredmax.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
-; CHECK-NEXT: vmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredmax.vs v8, v8, v16
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: vmax.vv v8, v8, v16
+; CHECK-NEXT: vredmax.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: vmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredmax.vs v8, v8, v16
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: vmax.vv v8, v8, v16
+; CHECK-NEXT: vredmax.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v12
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
; CHECK-NEXT: vminu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v12
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
; CHECK-NEXT: vminu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, -1
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -1
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v12
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
; CHECK-NEXT: vminu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v16, -1
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v16
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v9, -1
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v10, -1
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v16
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v16, (a0)
; RV64-NEXT: vminu.vv v8, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v16
; RV64-NEXT: vminu.vv v16, v24, v16
; RV64-NEXT: vminu.vv v8, v8, v0
; RV64-NEXT: vminu.vv v8, v8, v16
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v16, -1
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v16
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vmaxu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT: vredmaxu.vs v8, v8, v16
+; CHECK-NEXT: vredmaxu.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <256 x i8>, <256 x i8>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vmaxu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vredmaxu.vs v8, v8, v16
+; CHECK-NEXT: vredmaxu.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <128 x i16>, <128 x i16>* %x
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v24, zero
; CHECK-NEXT: vmaxu.vv v8, v8, v16
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vredmaxu.vs v8, v8, v16
+; CHECK-NEXT: vredmaxu.vs v8, v8, v24
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <64 x i32>, <64 x i32>* %x
define i64 @reduce_add(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_add:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
define i64 @reduce_add2(<4 x i64> %v) {
; CHECK-LABEL: reduce_add2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
define i64 @reduce_and(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_and:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
define i64 @reduce_and2(<4 x i64> %v) {
; CHECK-LABEL: reduce_and2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
define i64 @reduce_or(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_or:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v10
define i64 @reduce_or2(<4 x i64> %v) {
; CHECK-LABEL: reduce_or2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v10
define i64 @reduce_xor(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_xor:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v10
define i64 @reduce_xor2(<4 x i64> %v) {
; CHECK-LABEL: reduce_xor2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v10
define i64 @reduce_umax(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_umax:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
define i64 @reduce_umax2(<4 x i64> %v) {
; CHECK-LABEL: reduce_umax2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
define i64 @reduce_umin(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_umin:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
define i64 @reduce_umin2(<4 x i64> %v) {
; CHECK-LABEL: reduce_umin2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
define i64 @reduce_smax(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_smax:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
define i64 @reduce_smax2(<4 x i64> %v) {
; CHECK-LABEL: reduce_smax2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
define i64 @reduce_smin(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_smin:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v10
define i64 @reduce_smin2(<4 x i64> %v) {
; CHECK-LABEL: reduce_smin2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v10
define float @reduce_fadd(float %x, <4 x float> %v) {
; CHECK-LABEL: reduce_fadd:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @reduce_fadd2(float %x, <4 x float> %v) {
; CHECK-LABEL: reduce_fadd2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @reduce_fmax(float %x, <4 x float> %v) {
; CHECK-LABEL: reduce_fmax:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @reduce_fmin(float %x, <4 x float> %v) {
; CHECK-LABEL: reduce_fmin:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-LABEL: vreduce_fadd_nxv1f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv1f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv2f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv4f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v10
define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v10
define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v10
define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v10
define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v12
define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v12
define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
; CHECK-LABEL: vreduce_fmin_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI30_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI30_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI31_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI31_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI32_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI32_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI33_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI33_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI34_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI34_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: vreduce_fmin_nxv64f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI35_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v16, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI35_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI36_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI36_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI36_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI37_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI37_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI37_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI38_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI39_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI39_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI39_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI40_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI40_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v10, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI40_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfredmin.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: vreduce_fmin_nxv32f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI41_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI41_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v16, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI41_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI42_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI42_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI42_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI43_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI43_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI43_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI44_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI44_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmin_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI45_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI45_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI45_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfredmin.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-LABEL: vreduce_fmin_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v12, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI46_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfredmin.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: vreduce_fmin_nxv16f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI47_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI47_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v16, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI47_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI48_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI48_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI48_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI49_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI49_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI50_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI50_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI51_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI51_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI52_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v9, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI52_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: vreduce_fmax_nxv64f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI53_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vlse16.v v16, (a0), zero
+; CHECK-NEXT: flh ft0, %lo(.LCPI53_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI54_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI54_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI54_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI55_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI55_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI55_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI56_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI56_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI56_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI57_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI57_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v9, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI57_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI58_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v10, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI58_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfredmax.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: vreduce_fmax_nxv32f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI59_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI59_0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vlse32.v v16, (a0), zero
+; CHECK-NEXT: flw ft0, %lo(.LCPI59_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI60_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI60_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI60_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI61_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI61_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans_noinfs:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI62_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI62_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v9, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI62_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_fmax_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI63_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI63_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v10, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI63_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfredmax.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-LABEL: vreduce_fmax_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI64_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI64_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v12, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI64_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfredmax.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: vreduce_fmax_nxv16f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: lui a0, %hi(.LCPI65_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI65_0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v16, (a0), zero
+; CHECK-NEXT: fld ft0, %lo(.LCPI65_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: vfmv.s.f v16, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v16
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: vreduce_nsz_fadd_nxv1f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vfmv.v.f v9, ft0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vfmv.v.f v10, ft0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v9, v10, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v10
; CHECK-NEXT: vslideup.vi v11, v12, 0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v11, v12, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v12
define half @vreduce_ord_fadd_nxv12f16(<vscale x 12 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv12f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: fmv.h.x ft0, zero
; CHECK-NEXT: fneg.h ft0, ft0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.v.f v11, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfredosum.vs v8, v8, v12
; CHECK-NEXT: vfmv.v.f v9, ft0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vfmv.v.f v10, ft0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v9, v10, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v10
; CHECK-NEXT: vslideup.vi v11, v12, 0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v11, v12, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfredmin.vs v8, v8, v12
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI74_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI74_0)(a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vfmv.v.f v11, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfredmax.vs v8, v8, v12
define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_add_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_umax_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv1i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_umin_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smin_nxv1i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_and_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_or_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vreduce_xor_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_add_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_umax_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_umin_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smin_nxv2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_and_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_or_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vreduce_xor_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_add_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_umax_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_umin_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smin_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_and_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_or_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vreduce_xor_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_add_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vwreduce_add_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_umax_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv1i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_umin_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_and_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_or_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vreduce_xor_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_add_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vwreduce_add_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_umax_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_umin_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_and_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_or_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vreduce_xor_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_add_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vwreduce_add_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_umax_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_umin_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 8
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 8
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_and_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_or_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vreduce_xor_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_add_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vwreduce_add_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_umax_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv1i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_umin_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_and_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_or_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vreduce_xor_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_add_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vwreduce_add_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_umax_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-LABEL: vreduce_smax_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_umin_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredminu.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v9
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_and_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, -1
; CHECK-NEXT: vredand.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_or_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vreduce_xor_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vredxor.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_add_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vwreduce_add_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_umax_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
; CHECK-LABEL: vreduce_smax_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_umin_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredminu.vs v8, v8, v10
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v10
define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_and_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredand.vs v8, v8, v10
define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_or_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v10
define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_xor_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v10
;
; RV64-LABEL: vreduce_add_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vredsum.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vwreduce_add_nxv1i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v9
; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
;
; RV64-LABEL: vwreduce_uadd_nxv1i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v9
; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
;
; RV64-LABEL: vreduce_umax_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vredmaxu.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: slli a0, a0, 63
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vredmax.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_umin_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.i v9, -1
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.i v9, -1
; RV64-NEXT: vredminu.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_and_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.i v9, -1
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.i v9, -1
; RV64-NEXT: vredand.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_or_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vredor.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_xor_nxv1i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vredxor.vs v8, v8, v9
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
;
; RV64-LABEL: vreduce_add_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredsum.vs v8, v8, v10
;
; RV64-LABEL: vwreduce_add_nxv2i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v9
;
; RV64-LABEL: vwreduce_uadd_nxv2i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v9
;
; RV64-LABEL: vreduce_umax_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredmaxu.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: slli a0, a0, 63
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredmax.vs v8, v8, v10
;
; RV64-LABEL: vreduce_umin_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v10, -1
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v10
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v10
;
; RV64-LABEL: vreduce_and_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v10, -1
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v10
;
; RV64-LABEL: vreduce_or_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredor.vs v8, v8, v10
;
; RV64-LABEL: vreduce_xor_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredxor.vs v8, v8, v10
;
; RV64-LABEL: vreduce_add_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredsum.vs v8, v8, v12
;
; RV64-LABEL: vwreduce_add_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v10
;
; RV64-LABEL: vwreduce_uadd_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v10
;
; RV64-LABEL: vreduce_umax_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredmaxu.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: slli a0, a0, 63
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, a0
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredmax.vs v8, v8, v12
;
; RV64-LABEL: vreduce_umin_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v12
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, a0
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v12
;
; RV64-LABEL: vreduce_and_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredand.vs v8, v8, v12
;
; RV64-LABEL: vreduce_or_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredor.vs v8, v8, v12
;
; RV64-LABEL: vreduce_xor_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredxor.vs v8, v8, v12