From 9c410838d2bcaa087e867e9f8a353fe455588287 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 17 Jan 2022 14:31:28 -0800 Subject: [PATCH] [RISCV] Legalize fixed length (insert_subvector undef, X, 0) to a scalable insert. We were considering this legal, but later the undef would become an all zeros vector. This would cause us to need to re-legalize the insert later into a vslideup with zero vector. This patch catches the case and directly legalizes it to a scalable insert. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D117377 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +- .../RISCV/rvv/fixed-vectors-extload-truncstore.ll | 361 ++++++++------------- .../CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll | 37 +-- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 192 +++++------ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 44 +-- .../RISCV/rvv/fixed-vectors-insert-subvector.ll | 6 +- .../test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 20 +- .../RISCV/rvv/fixed-vectors-int-exttrunc.ll | 14 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 24 +- 9 files changed, 269 insertions(+), 435 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e6d855b..5dd10a4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4825,7 +4825,7 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, // register size. Therefore we must slide the vector group up the full // amount. if (SubVecVT.isFixedLengthVector()) { - if (OrigIdx == 0 && Vec.isUndef()) + if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) return Op; MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { @@ -4835,6 +4835,10 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, DAG.getUNDEF(ContainerVT), SubVec, DAG.getConstant(0, DL, XLenVT)); + if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { + SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); + return DAG.getBitcast(Op.getValueType(), SubVec); + } SDValue Mask = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; // Set the vector length to only the number of elements we care about. Note diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll index e230129..063d4f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -847,16 +847,11 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %z) { ; LMULMAX1-LABEL: truncstore_v16i16_v16i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 8 -; LMULMAX1-NEXT: vse8.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 8 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i16_v16i8: @@ -1162,20 +1157,16 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %z) { ; LMULMAX1-LABEL: truncstore_v8i32_v8i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 -; LMULMAX1-NEXT: vse8.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i32_v8i8: @@ -1195,16 +1186,11 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %z) { ; LMULMAX1-LABEL: truncstore_v8i32_v8i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 -; LMULMAX1-NEXT: vse16.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i32_v8i16: @@ -1284,32 +1270,28 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %z) { ; LMULMAX1-LABEL: truncstore_v16i32_v16i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 12, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 8 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 8 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 12 -; LMULMAX1-NEXT: vse8.v v12, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 12 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i32_v16i8: @@ -1329,27 +1311,18 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %z) { ; LMULMAX1-LABEL: truncstore_v16i32_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v13, v12 -; LMULMAX1-NEXT: vslideup.vi v13, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v8, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v12, (a1) -; LMULMAX1-NEXT: vse16.v v13, (a0) +; LMULMAX1-NEXT: vse16.v v10, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i32_v16i16: @@ -1505,24 +1478,20 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %z) { ; LMULMAX1-LABEL: truncstore_v4i64_v4i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 2 -; LMULMAX1-NEXT: vse8.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v4i64_v4i8: @@ -1544,20 +1513,16 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %z) { ; LMULMAX1-LABEL: truncstore_v4i64_v4i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 2 -; LMULMAX1-NEXT: vse16.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v4i64_v4i16: @@ -1577,16 +1542,11 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %z) { ; LMULMAX1-LABEL: truncstore_v4i64_v4i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 2 -; LMULMAX1-NEXT: vse32.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 +; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v4i64_v4i32: @@ -1604,40 +1564,36 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) { ; LMULMAX1-LABEL: truncstore_v8i64_v8i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse8.v v12, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i64_v8i8: @@ -1659,32 +1615,28 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %z) { ; LMULMAX1-LABEL: truncstore_v8i64_v8i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse16.v v12, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i64_v8i16: @@ -1704,27 +1656,18 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %z) { ; LMULMAX1-LABEL: truncstore_v8i64_v8i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v13, v12 -; LMULMAX1-NEXT: vslideup.vi v13, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v12, (a1) -; LMULMAX1-NEXT: vse32.v v13, (a0) +; LMULMAX1-NEXT: vse32.v v10, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i64_v8i32: @@ -1742,21 +1685,17 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) { ; LMULMAX1-LABEL: truncstore_v16i64_v16i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v16, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v16, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, m1, tu, mu ; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu @@ -1813,24 +1752,20 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) { ; LMULMAX4-LABEL: truncstore_v16i64_v16i8: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v16, v8, 0 +; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v8, v16, 0 +; LMULMAX4-NEXT: vnsrl.wi v12, v16, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX4-NEXT: vmv.v.i v9, 0 -; LMULMAX4-NEXT: vsetivli zero, 8, e8, m1, tu, mu -; LMULMAX4-NEXT: vslideup.vi v9, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v10, v12, 0 +; LMULMAX4-NEXT: vnsrl.wi v12, v12, 0 +; LMULMAX4-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, tu, mu -; LMULMAX4-NEXT: vslideup.vi v9, v8, 8 -; LMULMAX4-NEXT: vse8.v v9, (a0) +; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 +; LMULMAX4-NEXT: vse8.v v8, (a0) ; LMULMAX4-NEXT: ret %y = trunc <16 x i64> %x to <16 x i8> store <16 x i8> %y, <16 x i8>* %z @@ -1841,59 +1776,52 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) { ; LMULMAX1-LABEL: truncstore_v16i64_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v16, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v17, v16 -; LMULMAX1-NEXT: vslideup.vi v17, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v17, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v17, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v17, v8, 6 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v12, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v13, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v14, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v14, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v15, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 6 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 6 ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v16, (a1) -; LMULMAX1-NEXT: vse16.v v17, (a0) +; LMULMAX1-NEXT: vse16.v v10, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i64_v16i16: @@ -1906,13 +1834,9 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) { ; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX4-NEXT: vmv.v.i v10, 0 -; LMULMAX4-NEXT: vsetivli zero, 8, e16, m2, tu, mu -; LMULMAX4-NEXT: vslideup.vi v10, v8, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, tu, mu -; LMULMAX4-NEXT: vslideup.vi v10, v12, 8 -; LMULMAX4-NEXT: vse16.v v10, (a0) +; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 +; LMULMAX4-NEXT: vse16.v v8, (a0) ; LMULMAX4-NEXT: ret %y = trunc <16 x i64> %x to <16 x i16> store <16 x i16> %y, <16 x i16>* %z @@ -1923,49 +1847,32 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, <16 x i32>* %z) { ; LMULMAX1-LABEL: truncstore_v16i64_v16i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v16, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v17, v16 -; LMULMAX1-NEXT: vslideup.vi v17, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v17, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v9, v16 -; LMULMAX1-NEXT: vslideup.vi v9, v8, 0 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v10, v16 -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v13, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v11, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v14, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v15, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0 +; LMULMAX1-NEXT: vnsrl.wi v12, v14, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v12, v9, 2 ; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vse32.v v16, (a1) +; LMULMAX1-NEXT: vse32.v v12, (a1) ; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vse32.v v10, (a1) +; LMULMAX1-NEXT: vse32.v v11, (a1) ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v9, (a1) -; LMULMAX1-NEXT: vse32.v v17, (a0) +; LMULMAX1-NEXT: vse32.v v10, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i64_v16i32: @@ -1973,13 +1880,9 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, <16 x i32>* %z) { ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0 ; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m4, tu, mu -; LMULMAX4-NEXT: vslideup.vi v8, v12, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, tu, mu -; LMULMAX4-NEXT: vslideup.vi v8, v16, 8 -; LMULMAX4-NEXT: vse32.v v8, (a0) +; LMULMAX4-NEXT: vslideup.vi v12, v16, 8 +; LMULMAX4-NEXT: vse32.v v12, (a0) ; LMULMAX4-NEXT: ret %y = trunc <16 x i64> %x to <16 x i32> store <16 x i32> %y, <16 x i32>* %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll index 9eecc6e..5050af573 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll @@ -157,21 +157,16 @@ define void @fpround_v8f32_v8f16(<8 x float>* %x, <8 x half>* %y) { ; ; LMULMAX1-LABEL: fpround_v8f32_v8f16: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, a0, 16 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a0) +; LMULMAX1-NEXT: vle32.v v9, (a2) ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v10, v9 +; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v10, 4 -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 +; LMULMAX1-NEXT: vse16.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x float>, <8 x float>* %x %d = fptrunc <8 x float> %a to <8 x half> @@ -205,29 +200,25 @@ define void @fpround_v8f64_v8f16(<8 x double>* %x, <8 x half>* %y) { ; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v10 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v12 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v11, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v11, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v9 +; LMULMAX1-NEXT: vfncvt.rod.f.f.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v9, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v9, v11 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rod.f.f.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse16.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse16.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x double>, <8 x double>* %x %d = fptrunc <8 x double> %a to <8 x half> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index d0f450f..f125dfd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -121,32 +121,24 @@ define <8 x i1> @fp2si_v8f32_v8i1(<8 x float> %x) { ; ; LMULMAX1-LABEL: fp2si_v8f32_v8i1: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmclr.m v0 -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vmerge.vim v11, v10, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v8 -; LMULMAX1-NEXT: vand.vi v8, v12, 1 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v8 +; LMULMAX1-NEXT: vand.vi v8, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v11, v12, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v11, 0 -; LMULMAX1-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v11, v9 -; LMULMAX1-NEXT: vand.vi v9, v11, 1 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v9 +; LMULMAX1-NEXT: vand.vi v9, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 +; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptosi <8 x float> %x to <8 x i1> ret <8 x i1> %z @@ -163,32 +155,24 @@ define <8 x i1> @fp2ui_v8f32_v8i1(<8 x float> %x) { ; ; LMULMAX1-LABEL: fp2ui_v8f32_v8i1: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmclr.m v0 -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vmerge.vim v11, v10, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; LMULMAX1-NEXT: vand.vi v8, v12, 1 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; LMULMAX1-NEXT: vand.vi v8, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v11, v12, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v11, 0 -; LMULMAX1-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v11, v9 -; LMULMAX1-NEXT: vand.vi v9, v11, 1 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v9 +; LMULMAX1-NEXT: vand.vi v9, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 +; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptoui <8 x float> %x to <8 x i1> ret <8 x i1> %z @@ -448,26 +432,22 @@ define void @fp2si_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) { ; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v11, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v9 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -475,8 +455,8 @@ define void @fp2si_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) { ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse8.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse8.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x double>, <8 x double>* %x %d = fptosi <8 x double> %a to <8 x i8> @@ -514,26 +494,22 @@ define void @fp2ui_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) { ; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v11, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v9 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -541,8 +517,8 @@ define void @fp2ui_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) { ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse8.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse8.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x double>, <8 x double>* %x %d = fptoui <8 x double> %a to <8 x i8> @@ -561,54 +537,46 @@ define <8 x i1> @fp2si_v8f64_v8i1(<8 x double> %x) { ; ; LMULMAX1-LABEL: fp2si_v8f64_v8i1: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmclr.m v0 -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v14, v8 -; LMULMAX1-NEXT: vand.vi v8, v14, 1 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v8 +; LMULMAX1-NEXT: vand.vi v8, v12, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v14, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v14, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v13, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v14, v9 -; LMULMAX1-NEXT: vand.vi v9, v14, 1 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v13, v9 +; LMULMAX1-NEXT: vand.vi v9, v13, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v9, v8, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmerge.vim v13, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v9, 2 +; LMULMAX1-NEXT: vslideup.vi v12, v13, 2 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v13, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 +; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 +; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v13, v10 ; LMULMAX1-NEXT: vand.vi v10, v13, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v10, v8, 1, v0 +; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v10, 4 +; LMULMAX1-NEXT: vslideup.vi v12, v10, 4 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 +; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v11 ; LMULMAX1-NEXT: vand.vi v10, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 +; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptosi <8 x double> %x to <8 x i1> ret <8 x i1> %z @@ -625,54 +593,46 @@ define <8 x i1> @fp2ui_v8f64_v8i1(<8 x double> %x) { ; ; LMULMAX1-LABEL: fp2ui_v8f64_v8i1: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmclr.m v0 -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v14, v8 -; LMULMAX1-NEXT: vand.vi v8, v14, 1 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; LMULMAX1-NEXT: vand.vi v8, v12, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v14, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v14, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v13, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v14, v9 -; LMULMAX1-NEXT: vand.vi v9, v14, 1 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v13, v9 +; LMULMAX1-NEXT: vand.vi v9, v13, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v9, v8, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmerge.vim v13, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v9, 2 +; LMULMAX1-NEXT: vslideup.vi v12, v13, 2 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v13, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 +; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 +; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v13, v10 ; LMULMAX1-NEXT: vand.vi v10, v13, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v10, v8, 1, v0 +; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v10, 4 +; LMULMAX1-NEXT: vslideup.vi v12, v10, 4 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 +; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v11 ; LMULMAX1-NEXT: vand.vi v10, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 +; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptoui <8 x double> %x to <8 x i1> ret <8 x i1> %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index 820c955..8d496e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -478,29 +478,25 @@ define void @si2fp_v8i64_v8f16(<8 x i64>* %x, <8 x half>* %y) { ; LMULMAX1-NEXT: vfncvt.f.x.w v12, v10 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v12 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.x.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.f.x.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v11, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v11, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.x.w v10, v9 +; LMULMAX1-NEXT: vfncvt.f.x.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v9, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v9, v11 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.f.x.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse16.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse16.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x i64>, <8 x i64>* %x %d = sitofp <8 x i64> %a to <8 x half> @@ -534,29 +530,25 @@ define void @ui2fp_v8i64_v8f16(<8 x i64>* %x, <8 x half>* %y) { ; LMULMAX1-NEXT: vfncvt.f.xu.w v12, v10 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v12 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.xu.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.f.xu.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v11, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v11, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.xu.w v10, v9 +; LMULMAX1-NEXT: vfncvt.f.xu.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v9, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v9, v11 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.f.xu.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse16.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse16.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x i64>, <8 x i64>* %x %d = uitofp <8 x i64> %a to <8 x half> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index b1bf02d..01a8870 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -152,11 +152,7 @@ define void @insert_v4i32_undef_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; CHECK-NEXT: vslideup.vi v9, v8, 0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %sv = load <2 x i32>, <2 x i32>* %svp %v = call <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32> undef, <2 x i32> %sv, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index ed7da45..7919553 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -43,28 +43,24 @@ define void @insertelt_v3i64(<3 x i64>* %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, mu -; RV32-NEXT: vslideup.vi v10, v8, 0 ; RV32-NEXT: lw a3, 16(a0) ; RV32-NEXT: addi a4, a0, 20 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vlse32.v v8, (a4), zero +; RV32-NEXT: vlse32.v v10, (a4), zero ; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; RV32-NEXT: vmv.s.x v8, a3 +; RV32-NEXT: vmv.s.x v10, a3 ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu -; RV32-NEXT: vslideup.vi v10, v8, 2 +; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vslide1up.vx v12, v8, a2 -; RV32-NEXT: vslide1up.vx v8, v12, a1 +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vslide1up.vx v12, v10, a2 +; RV32-NEXT: vslide1up.vx v10, v12, a1 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, mu -; RV32-NEXT: vslideup.vi v10, v8, 2 +; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: sw a1, 16(a0) ; RV32-NEXT: sw a2, 20(a0) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vse64.v v10, (a0) +; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v3i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll index c71cabb..89dd2bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll @@ -213,17 +213,13 @@ define void @trunc_v8i8_v8i32(<8 x i32>* %x, <8 x i8>* %z) { ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 -; LMULMAX1-NEXT: vse8.v v10, (a1) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 +; LMULMAX1-NEXT: vse8.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = trunc <8 x i32> %a to <8 x i8> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 615a27c..4af789e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2173,28 +2173,24 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m ; ; RV64-LABEL: mgather_baseidx_v32i8: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vmv1r.v v12, v10 +; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu -; RV64-NEXT: vslidedown.vi v14, v10, 16 -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v10, v10, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsext.vf8 v24, v16 +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v14, (a0), v24, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vsetivli zero, 16, e8, m2, tu, mu -; RV64-NEXT: vslideup.vi v8, v10, 0 ; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, mu -; RV64-NEXT: vslideup.vi v8, v14, 16 +; RV64-NEXT: vslideup.vi v12, v10, 16 +; RV64-NEXT: vmv2r.v v8, v12 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs %v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru) -- 2.7.4