From 2b24e7b5f7bb316317ae4f92347516087348aceb Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 31 Mar 2023 19:47:08 +0100 Subject: [PATCH] [RISCV] Use tail agnostic policy more often when lowering insert_subvector If we're inserting a fixed length subvector into a fixed length vector, then we can use a tail agnostic policy as long as we're inserting up to or past the end of the main vector. I.e., because we're overwriting all of the main vector's tail elements, and we don't care what the elements after that are. As noted by Philip in https://reviews.llvm.org/D146711#4220341 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D147347 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 ++--- llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll | 6 +-- .../CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll | 12 +++--- .../RISCV/rvv/fixed-vectors-extload-truncstore.ll | 46 +++++++++++----------- .../CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 16 +++----- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 4 +- .../RISCV/rvv/fixed-vectors-insert-subvector.ll | 9 +---- .../test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 2 +- .../RISCV/rvv/fixed-vectors-int-exttrunc.ll | 2 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 4 +- .../RISCV/rvv/fixed-vectors-reduction-int-vp.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll | 6 +-- .../RISCV/rvv/fixed-vectors-setcc-int-vp.ll | 6 +-- .../CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll | 5 +-- llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll | 14 +++---- .../CodeGen/RISCV/rvv/vector-interleave-fixed.ll | 40 +++++++++---------- 18 files changed, 91 insertions(+), 102 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 9f17813..de1fd44 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6660,14 +6660,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; // Set the vector length to only the number of elements we care about. Note // that for slideup this includes the offset. - SDValue VL = - getVLOp(OrigIdx + SubVecVT.getVectorNumElements(), DL, DAG, Subtarget); + unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements(); + SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget); SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); - // Use tail agnostic policy if OrigIdx is the last index of Vec. + // Use tail agnostic policy if we're inserting over Vec's tail. unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; - if (VecVT.isFixedLengthVector() && - OrigIdx + 1 == VecVT.getVectorNumElements()) + if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements()) Policy = RISCVII::TAIL_AGNOSTIC; SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec, SlideupAmt, Mask, VL, Policy); diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 0174ca0..fe45772 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -112,7 +112,7 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v0, v8, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: ret %mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc) @@ -147,7 +147,7 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v0, v16, 6 ; CHECK-NEXT: ret %mask = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 %index, i64 %tc) @@ -214,7 +214,7 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vi v0, v16, 14 ; CHECK-NEXT: ret %mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll index a311973..34194e6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll @@ -112,9 +112,9 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 ; CHECK-NEXT: vmv1r.v v0, v28 ; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl) ret <32 x float> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll index e697cb7..f628ac9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -71,9 +71,9 @@ define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zero ; CHECK-NEXT: vmv1r.v v0, v28 ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslideup.vx v16, v24, a1 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %v = call <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16> %a, <128 x i1> %m, i32 %vl) ret <128 x i7> %v @@ -376,7 +376,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: li a7, 32 ; CHECK-NEXT: .LBB16_12: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 48 ; CHECK-NEXT: mul a1, a1, a4 @@ -446,7 +446,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t -; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vslideup.vi v16, v8, 16 ; CHECK-NEXT: vse32.v v16, (a0) ; CHECK-NEXT: addi a1, a0, 256 @@ -505,9 +505,9 @@ define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext ; CHECK-NEXT: vmv1r.v v0, v28 ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %v = call <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64> %a, <32 x i1> %m, i32 %vl) ret <32 x i32> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll index c0f4778..7edbdbdf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -814,7 +814,7 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %z) { ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 8 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1123,7 +1123,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1147,7 +1147,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, ptr %z) { ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1246,7 +1246,7 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 12 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1270,12 +1270,12 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, ptr %z) { ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vse16.v v10, (a1) @@ -1446,7 +1446,7 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1477,7 +1477,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1501,7 +1501,7 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1548,7 +1548,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1591,7 +1591,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1615,12 +1615,12 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vse32.v v10, (a1) @@ -1701,7 +1701,7 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 14 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret @@ -1720,7 +1720,7 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, ptr %z) { ; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 ; LMULMAX4-NEXT: vse8.v v8, (a0) ; LMULMAX4-NEXT: ret @@ -1752,7 +1752,7 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0 @@ -1774,7 +1774,7 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v10, v9, 6 ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vse16.v v10, (a1) @@ -1791,7 +1791,7 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, ptr %z) { ; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, tu, ma +; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 ; LMULMAX4-NEXT: vse16.v v8, (a0) ; LMULMAX4-NEXT: ret @@ -1806,22 +1806,22 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, ptr %z) { ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0 ; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v11, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0 ; LMULMAX1-NEXT: vnsrl.wi v12, v14, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v12, v9, 2 ; LMULMAX1-NEXT: addi a1, a0, 48 ; LMULMAX1-NEXT: vse32.v v12, (a1) @@ -1837,7 +1837,7 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, ptr %z) { ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0 ; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, tu, ma +; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: vslideup.vi v12, v16, 8 ; LMULMAX4-NEXT: vse32.v v12, (a0) ; LMULMAX4-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll index 3a59274..b3e590c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll @@ -160,7 +160,7 @@ define void @fpround_v8f32_v8f16(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vle32.v v9, (a2) ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v8 ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 ; LMULMAX1-NEXT: vse16.v v10, (a1) ; LMULMAX1-NEXT: ret @@ -210,7 +210,7 @@ define void @fpround_v8f64_v8f16(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 ; LMULMAX1-NEXT: vse16.v v9, (a1) ; LMULMAX1-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index 6a3e456..36e1fd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -157,9 +157,8 @@ define <8 x i1> @fp2si_v8f32_v8i1(<8 x float> %x) { ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; LMULMAX1-NEXT: vmv.v.i v9, 0 ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptosi <8 x float> %x to <8 x i1> @@ -191,9 +190,8 @@ define <8 x i1> @fp2ui_v8f32_v8i1(<8 x float> %x) { ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; LMULMAX1-NEXT: vmv.v.i v9, 0 ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptoui <8 x float> %x to <8 x i1> @@ -472,7 +470,7 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 ; LMULMAX1-NEXT: vse8.v v9, (a1) ; LMULMAX1-NEXT: ret @@ -532,7 +530,7 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 ; LMULMAX1-NEXT: vse8.v v9, (a1) ; LMULMAX1-NEXT: ret @@ -589,9 +587,8 @@ define <8 x i1> @fp2si_v8f64_v8i1(<8 x double> %x) { ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptosi <8 x double> %x to <8 x i1> @@ -645,9 +642,8 @@ define <8 x i1> @fp2ui_v8f64_v8i1(<8 x double> %x) { ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptoui <8 x double> %x to <8 x i1> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index 462d2ec..18fcac4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -514,7 +514,7 @@ define void @si2fp_v8i64_v8f16(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vfncvt.f.x.w v10, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 ; LMULMAX1-NEXT: vse16.v v9, (a1) ; LMULMAX1-NEXT: ret @@ -564,7 +564,7 @@ define void @ui2fp_v8i64_v8f16(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vfncvt.f.xu.w v10, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 ; LMULMAX1-NEXT: vse16.v v9, (a1) ; LMULMAX1-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 7a66295..3ef2588 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -135,7 +135,6 @@ define void @insert_v4i32_v2i32_2(ptr %vp, ptr %svp) { ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v9, v8, 2 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: ret @@ -210,7 +209,6 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { ; LMULMAX1-NEXT: vle32.v v8, (a1) ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 ; LMULMAX1-NEXT: vse32.v v9, (a0) ; LMULMAX1-NEXT: ret @@ -228,7 +226,6 @@ define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) { ; LMULMAX2-NEXT: vle32.v v8, (a1) ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v10, (a0) -; LMULMAX2-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; LMULMAX2-NEXT: vslideup.vi v10, v8, 6 ; LMULMAX2-NEXT: vse32.v v10, (a0) ; LMULMAX2-NEXT: ret @@ -240,7 +237,6 @@ define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) { ; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 ; LMULMAX1-NEXT: vse32.v v9, (a0) ; LMULMAX1-NEXT: ret @@ -302,7 +298,7 @@ define void @insert_v4i16_v2i16_2(ptr %vp, ptr %svp) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -419,9 +415,8 @@ define void @insert_v8i1_v4i1_4(ptr %vp, ptr %svp) { ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v9, v8, 4 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmsne.vi v8, v9, 0 ; CHECK-NEXT: vsm.v v8, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 416095e..43dfd63 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -47,7 +47,7 @@ define void @insertelt_v3i64(ptr %x, i64 %y) { ; RV32-NEXT: vlse32.v v10, (a4), zero ; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV32-NEXT: vmv.s.x v10, a3 -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v10, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll index e007f84..d3c843c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll @@ -208,7 +208,7 @@ define void @trunc_v8i8_v8i32(ptr %x, ptr %z) { ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vse8.v v8, (a1) ; LMULMAX1-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 26411ec..293e5e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12574,9 +12574,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64V-NEXT: li a0, 32 -; RV64V-NEXT: vsetvli zero, a0, e8, m2, tu, ma +; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; RV64V-NEXT: vslideup.vi v12, v10, 16 -; RV64V-NEXT: vmv2r.v v8, v12 +; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_baseidx_v32i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index fd9e009..908026a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -1805,7 +1805,7 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV32-NEXT: vid.v v24 ; RV32-NEXT: vmsltu.vx v12, v24, a1 ; RV32-NEXT: vmsltu.vx v13, v16, a1 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vi v12, v13, 4 ; RV32-NEXT: li a0, 64 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma @@ -1848,7 +1848,7 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV64-NEXT: vid.v v24 ; RV64-NEXT: vmsltu.vx v12, v24, a1 ; RV64-NEXT: vmsltu.vx v13, v16, a1 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vslideup.vi v12, v13, 4 ; RV64-NEXT: li a0, 64 ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 8f6bd0c..6bdb1a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -584,9 +584,9 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t -; CHECK-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vi v16, v1, 8 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmv.v.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -1195,7 +1195,7 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v16, v1, 2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index 1294c3f..fa91394 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -1358,7 +1358,7 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v16, v1, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb @@ -1390,7 +1390,7 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v16, v25, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1420,7 +1420,7 @@ define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v16, v25, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index 8417193..30be917 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -309,7 +309,7 @@ define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> % ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, ma +; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; RV64-NEXT: vslideup.vi v8, v12, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs @@ -2512,9 +2512,8 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vnsrl.wi v16, v8, 0 ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, tu, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vslideup.vi v16, v24, 16 -; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: mv a2, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index dcf1a05..4a6f52a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -26,7 +26,7 @@ define <4 x i8> @v2i8_2(<2 x i8> %a, <2 x i8> %b) { ; CHECK-NEXT: vslidedown.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: ret %v4i8 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> @@ -176,7 +176,7 @@ define <4 x i16> @v2i16_2(<2 x i16> %a, <2 x i16> %b) { ; CHECK-NEXT: vslidedown.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: ret %v4i16 = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> @@ -325,7 +325,7 @@ define <4 x i32> @v2i32_2(<2 x i32> %a, < 2 x i32> %b) { ; CHECK-NEXT: vslidedown.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: ret %v4i32 = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> @@ -475,7 +475,7 @@ define <4 x i64> @v2i64_2(<2 x i64> %a, < 2 x i64> %b) { ; CHECK-NEXT: vslidedown.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: ret %v4i64 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> @@ -564,7 +564,7 @@ define <4 x half> @v2f16_2(<2 x half> %a, <2 x half> %b) { ; CHECK-NEXT: vslidedown.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: ret %v4f16 = shufflevector <2 x half> %a, <2 x half> %b, <4 x i32> @@ -687,7 +687,7 @@ define <4 x float> @v2f32_2(<2 x float> %a, <2 x float> %b) { ; CHECK-NEXT: vslidedown.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: ret %v4f32 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> @@ -780,7 +780,7 @@ define <4 x double> @v2f64_2(<2 x double> %a, < 2 x double> %b) { ; CHECK-NEXT: vslidedown.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: ret %v4f64 = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll index 914782e..aa34095 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -283,33 +283,33 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) { define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) { ; RV32-LABEL: vector_interleave_v4i64_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v10, v9 +; RV32-NEXT: vmv1r.v v10, v8 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; RV32-NEXT: vslideup.vi v12, v8, 0 -; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, ma -; RV32-NEXT: vslideup.vi v12, v10, 2 +; RV32-NEXT: vslideup.vi v12, v10, 0 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: lui a0, %hi(.LCPI3_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI3_0) -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vle16.v v10, (a0) +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: vslideup.vi v12, v8, 2 ; RV32-NEXT: vrgatherei16.vv v8, v12, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: vector_interleave_v4i64_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v9 +; RV64-NEXT: vmv1r.v v10, v8 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; RV64-NEXT: vslideup.vi v12, v8, 0 -; RV64-NEXT: vsetivli zero, 4, e64, m2, tu, ma -; RV64-NEXT: vslideup.vi v12, v10, 2 +; RV64-NEXT: vslideup.vi v12, v10, 0 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: lui a0, %hi(.LCPI3_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI3_0) -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64-NEXT: vle64.v v10, (a0) +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: vslideup.vi v12, v8, 2 ; RV64-NEXT: vrgather.vv v8, v12, v10 ; RV64-NEXT: ret %res = call <4 x i64> @llvm.experimental.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b) @@ -391,33 +391,33 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) { ; RV32-LABEL: vector_interleave_v4f64_v2f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v10, v9 +; RV32-NEXT: vmv1r.v v10, v8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; RV32-NEXT: vslideup.vi v12, v8, 0 -; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, ma -; RV32-NEXT: vslideup.vi v12, v10, 2 +; RV32-NEXT: vslideup.vi v12, v10, 0 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: lui a0, %hi(.LCPI9_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI9_0) -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vle16.v v10, (a0) +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: vslideup.vi v12, v8, 2 ; RV32-NEXT: vrgatherei16.vv v8, v12, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: vector_interleave_v4f64_v2f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v9 +; RV64-NEXT: vmv1r.v v10, v8 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; RV64-NEXT: vslideup.vi v12, v8, 0 -; RV64-NEXT: vsetivli zero, 4, e64, m2, tu, ma -; RV64-NEXT: vslideup.vi v12, v10, 2 +; RV64-NEXT: vslideup.vi v12, v10, 0 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: lui a0, %hi(.LCPI9_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI9_0) -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64-NEXT: vle64.v v10, (a0) +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: vslideup.vi v12, v8, 2 ; RV64-NEXT: vrgather.vv v8, v12, v10 ; RV64-NEXT: ret %res = call <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b) -- 2.7.4