ret <16 x i8> %v
}
+declare <256 x i8> @llvm.vp.add.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
+
+define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v258i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: vle1.v v25, (a0)
+; CHECK-NEXT: addi a0, a1, -128
+; CHECK-NEXT: vmv1r.v v26, v0
+; CHECK-NEXT: mv a3, zero
+; CHECK-NEXT: bltu a1, a0, .LBB30_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: .LBB30_2:
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a1, a2, .LBB30_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: addi a1, zero, 128
+; CHECK-NEXT: .LBB30_4:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v26
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> undef, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+define <256 x i8> @vadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v258i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -128
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: bltu a0, a1, .LBB31_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB31_2:
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: addi a1, zero, 128
+; CHECK-NEXT: vadd.vi v16, v16, -1
+; CHECK-NEXT: bltu a0, a1, .LBB31_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: addi a0, zero, 128
+; CHECK-NEXT: .LBB31_4:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> undef, <256 x i32> zeroinitializer
+ %head = insertelement <256 x i1> undef, i1 true, i32 0
+ %m = shufflevector <256 x i1> %head, <256 x i1> undef, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+; Test splitting when the %evl is a known constant.
+
+define <256 x i8> @vadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vadd_vi_v258i8_evl129:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
+; CHECK-NEXT: vle1.v v25, (a0)
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> undef, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
+ ret <256 x i8> %v
+}
+
+; FIXME: The upper half is doing nothing.
+
+define <256 x i8> @vadd_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vadd_vi_v258i8_evl128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
+; CHECK-NEXT: vle1.v v25, (a0)
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> undef, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
+ ret <256 x i8> %v
+}
+
declare <2 x i16> @llvm.vp.add.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
define <2 x i16> @vadd_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
%v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
ret <16 x i64> %v
}
+
+; Test that split-legalization works as expected.
+
+declare <32 x i64> @llvm.vp.add.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
+
+define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_v32i64:
+; RV32: # %bb.0:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT: vmv1r.v v1, v0
+; RV32-NEXT: vslidedown.vi v0, v0, 2
+; RV32-NEXT: addi a2, zero, 32
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT: addi a2, a0, -16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: bltu a0, a2, .LBB106_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: .LBB106_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT: addi a1, zero, 16
+; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
+; RV32-NEXT: bltu a0, a1, .LBB106_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: addi a0, zero, 16
+; RV32-NEXT: .LBB106_4:
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v32i64:
+; RV64: # %bb.0:
+; RV64-NEXT: mv a1, zero
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT: addi a2, a0, -16
+; RV64-NEXT: vmv1r.v v25, v0
+; RV64-NEXT: vslidedown.vi v0, v0, 2
+; RV64-NEXT: bltu a0, a2, .LBB106_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, a2
+; RV64-NEXT: .LBB106_2:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT: addi a1, zero, 16
+; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
+; RV64-NEXT: bltu a0, a1, .LBB106_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: addi a0, zero, 16
+; RV64-NEXT: .LBB106_4:
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu
+; RV64-NEXT: vmv1r.v v0, v25
+; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> undef, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vi_v32i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: mv a1, zero
+; RV32-NEXT: addi a2, zero, 32
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT: addi a2, a0, -16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: bltu a0, a2, .LBB107_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: .LBB107_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT: addi a1, zero, 16
+; RV32-NEXT: vadd.vv v16, v16, v24
+; RV32-NEXT: bltu a0, a1, .LBB107_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: addi a0, zero, 16
+; RV32-NEXT: .LBB107_4:
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu
+; RV32-NEXT: vadd.vv v8, v8, v24
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vi_v32i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: mv a2, zero
+; RV64-NEXT: bltu a0, a1, .LBB107_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, a1
+; RV64-NEXT: .LBB107_2:
+; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT: addi a1, zero, 16
+; RV64-NEXT: vadd.vi v16, v16, -1
+; RV64-NEXT: bltu a0, a1, .LBB107_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: addi a0, zero, 16
+; RV64-NEXT: .LBB107_4:
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu
+; RV64-NEXT: vadd.vi v8, v8, -1
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> undef, <32 x i32> zeroinitializer
+ %head = insertelement <32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <32 x i1> %head, <32 x i1> undef, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+; FIXME: After splitting, the "high" vadd.vv is doing nothing; could be
+; replaced by undef.
+
+define <32 x i64> @vadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vadd_vx_v32i64_evl12:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: addi a0, zero, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, mu
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, mu
+; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, mu
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v32i64_evl12:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT: vslidedown.vi v25, v0, 2
+; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, mu
+; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
+; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, mu
+; RV64-NEXT: vmv1r.v v0, v25
+; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> undef, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vadd_vx_v32i64_evl27:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: addi a0, zero, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, mu
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, mu
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v32i64_evl27:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT: vslidedown.vi v25, v0, 2
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
+; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, mu
+; RV64-NEXT: vmv1r.v v0, v25
+; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> undef, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
+ ret <32 x i64> %v
+}
ret <vscale x 64 x i8> %v
}
+; Test that split-legalization works when the mask itself needs splitting.
+
+declare <vscale x 128 x i8> @llvm.vp.add.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32)
+
+define <vscale x 128 x i8> @vadd_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv128i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: bltu a1, a2, .LBB48_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: .LBB48_2:
+; CHECK-NEXT: mv a4, zero
+; CHECK-NEXT: vsetvli a5, zero, e8, m8, ta, mu
+; CHECK-NEXT: vle1.v v25, (a0)
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu
+; CHECK-NEXT: sub a0, a1, a2
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: bltu a1, a0, .LBB48_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a4, a0
+; CHECK-NEXT: .LBB48_4:
+; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> undef, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.add.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vadd_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv128i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: bltu a0, a1, .LBB49_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB49_2:
+; CHECK-NEXT: mv a3, zero
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: bltu a0, a1, .LBB49_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: .LBB49_4:
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu
+; CHECK-NEXT: vadd.vi v16, v16, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> undef, <vscale x 128 x i32> zeroinitializer
+ %head = insertelement <vscale x 128 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> undef, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.add.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
declare <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
define <vscale x 1 x i16> @vadd_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
ret <vscale x 16 x i32> %v
}
+; Test that split-legalization works then the mask needs manual splitting.
+
+declare <vscale x 32 x i32> @llvm.vp.add.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i32> @vadd_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: vmv1r.v v25, v0
+; CHECK-NEXT: vslidedown.vx v0, v0, a4
+; CHECK-NEXT: bltu a0, a3, .LBB116_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB116_2:
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB116_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB116_4:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.add.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+; FIXME: We don't catch this as unmasked.
+
+define <vscale x 32 x i32> @vadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv32i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a4, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, m4, ta, mu
+; CHECK-NEXT: vmset.m v25
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: vmv1r.v v26, v25
+; CHECK-NEXT: vslidedown.vx v0, v25, a4
+; CHECK-NEXT: bltu a0, a3, .LBB117_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB117_2:
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB117_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB117_4:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v26
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.add.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+; Test splitting when the %evl is a constant (albeit an unknown one).
+
+declare i32 @llvm.vscale.i32()
+
+; FIXME: The upper half of the operation is doing nothing.
+; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
+
+define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) {
+; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mv a2, zero
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a4, a0, 2
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: vmv1r.v v25, v0
+; CHECK-NEXT: vslidedown.vx v0, v0, a4
+; CHECK-NEXT: bltu a0, a3, .LBB118_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB118_2:
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB118_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB118_4:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> undef, <vscale x 32 x i32> zeroinitializer
+ %evl = call i32 @llvm.vscale.i32()
+ %evl0 = mul i32 %evl, 8
+ %v = call <vscale x 32 x i32> @llvm.vp.add.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl0)
+ ret <vscale x 32 x i32> %v
+}
+
+; FIXME: The first vadd.vi should be able to infer that its AVL is equivalent to VLMAX.
+; FIXME: The upper half of the operation is doing nothing.
+
+define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx16(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) {
+; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, mu
+; CHECK-NEXT: vslidedown.vx v25, v0, a1
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetivli zero, 0, e32, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> undef, <vscale x 32 x i32> zeroinitializer
+ %evl = call i32 @llvm.vscale.i32()
+ %evl0 = mul i32 %evl, 16
+ %v = call <vscale x 32 x i32> @llvm.vp.add.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl0)
+ ret <vscale x 32 x i32> %v
+}
+
declare <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
define <vscale x 1 x i64> @vadd_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {