From 80b9510806cf11c57f2dd87191d3989fc45defa8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 10 May 2021 09:34:32 -0700 Subject: [PATCH] [RISCV] Correct VL for fixed length masked scatter. We were incorrectly calling getVectorNumElements on a scalable vector type. This shouldn't be allowed. This gives a warning on EVT, but not MVT. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 14 +- .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 257 +++++++++------------ 2 files changed, 111 insertions(+), 160 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 9f2e5a0..05ceafe 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4437,21 +4437,23 @@ SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op, if (VT.isFixedLengthVector()) { // We need to use the larger of the value and index type to determine the // scalable type to use so we don't increase LMUL for any operand/result. + MVT ContainerVT; if (VT.bitsGE(IndexVT)) { - VT = getContainerForFixedLengthVector(VT); + ContainerVT = getContainerForFixedLengthVector(VT); IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), - VT.getVectorElementCount()); + ContainerVT.getVectorElementCount()); } else { IndexVT = getContainerForFixedLengthVector(IndexVT); - VT = MVT::getVectorVT(VT.getVectorElementType(), - IndexVT.getVectorElementCount()); + ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), + IndexVT.getVectorElementCount()); } Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); - Val = convertToScalableVector(VT, Val, DAG, Subtarget); + Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); if (!IsUnmasked) { - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); + MVT MaskVT = + MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 0b83e6e..bd43446 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -9,7 +9,7 @@ declare void @llvm.masked.scatter.v1i8.v1p0i8(<1 x i8>, <1 x i8*>, i32, <1 x i1> define void @mscatter_v1i8(<1 x i8> %val, <1 x i8*> %ptrs, <1 x i1> %m) { ; RV32-LABEL: mscatter_v1i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; RV32-NEXT: vsetivli a0, 1, e8,mf4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; @@ -33,7 +33,7 @@ define void @mscatter_v2i8(<2 x i8> %val, <2 x i8*> %ptrs, <2 x i1> %m) { ; ; RV64-LABEL: mscatter_v2i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 1, e8,mf8,ta,mu +; RV64-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> %val, <2 x i8*> %ptrs, i32 1, <2 x i1> %m) @@ -53,7 +53,7 @@ define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x i8*> %ptrs, <2 ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a0, 2, e8,mf2,ta,mu ; RV64-NEXT: vnsrl.wi v25, v8, 0 -; RV64-NEXT: vsetivli a0, 1, e8,mf8,ta,mu +; RV64-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; RV64-NEXT: vsoxei64.v v25, (zero), v9, v0.t ; RV64-NEXT: ret %tval = trunc <2 x i16> %val to <2 x i8> @@ -77,7 +77,7 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x i8*> %ptrs, <2 ; RV64-NEXT: vnsrl.wi v25, v8, 0 ; RV64-NEXT: vsetivli a0, 2, e8,mf4,ta,mu ; RV64-NEXT: vnsrl.wi v26, v25, 0 -; RV64-NEXT: vsetivli a0, 1, e8,mf8,ta,mu +; RV64-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; RV64-NEXT: vsoxei64.v v26, (zero), v9, v0.t ; RV64-NEXT: ret %tval = trunc <2 x i32> %val to <2 x i8> @@ -106,7 +106,6 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x i8*> %ptrs, <2 ; RV64-NEXT: vnsrl.wi v26, v25, 0 ; RV64-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; RV64-NEXT: vnsrl.wi v25, v26, 0 -; RV64-NEXT: vsetivli a0, 1, e8,mf8,ta,mu ; RV64-NEXT: vsoxei64.v v25, (zero), v9, v0.t ; RV64-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i8> @@ -119,13 +118,13 @@ declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1> define void @mscatter_v4i8(<4 x i8> %val, <4 x i8*> %ptrs, <4 x i1> %m) { ; RV32-LABEL: mscatter_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; RV32-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; RV64-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> %m) @@ -135,13 +134,13 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x i8*> %ptrs, <4 x i1> %m) { define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x i8*> %ptrs) { ; RV32-LABEL: mscatter_truemask_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; RV32-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_truemask_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e8,mf4,ta,mu +; RV64-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret %mhead = insertelement <4 x i1> undef, i1 1, i32 0 @@ -167,13 +166,13 @@ declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1> define void @mscatter_v8i8(<8 x i8> %val, <8 x i8*> %ptrs, <8 x i1> %m) { ; RV32-LABEL: mscatter_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 4, e8,mf2,ta,mu +; RV32-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 4, e8,mf2,ta,mu +; RV64-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, i32 1, <8 x i1> %m) @@ -185,7 +184,7 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, i8* %base, <8 x i8> %idxs, <8 ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v9 -; RV32-NEXT: vsetivli a1, 4, e8,mf2,ta,mu +; RV32-NEXT: vsetivli a1, 8, e8,mf2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -193,7 +192,7 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, i8* %base, <8 x i8> %idxs, <8 ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v9 -; RV64-NEXT: vsetivli a1, 4, e8,mf2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e8,mf2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, i8* %base, <8 x i8> %idxs @@ -206,7 +205,7 @@ declare void @llvm.masked.scatter.v1i16.v1p0i16(<1 x i16>, <1 x i16*>, i32, <1 x define void @mscatter_v1i16(<1 x i16> %val, <1 x i16*> %ptrs, <1 x i1> %m) { ; RV32-LABEL: mscatter_v1i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV32-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; @@ -230,7 +229,7 @@ define void @mscatter_v2i16(<2 x i16> %val, <2 x i16*> %ptrs, <2 x i1> %m) { ; ; RV64-LABEL: mscatter_v2i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 1, e16,mf4,ta,mu +; RV64-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> %val, <2 x i16*> %ptrs, i32 2, <2 x i1> %m) @@ -249,7 +248,7 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x i16*> %ptrs, < ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a0, 2, e16,mf2,ta,mu ; RV64-NEXT: vnsrl.wi v25, v8, 0 -; RV64-NEXT: vsetivli a0, 1, e16,mf4,ta,mu +; RV64-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; RV64-NEXT: vsoxei64.v v25, (zero), v9, v0.t ; RV64-NEXT: ret %tval = trunc <2 x i32> %val to <2 x i16> @@ -274,7 +273,6 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x i16*> %ptrs, < ; RV64-NEXT: vnsrl.wi v25, v8, 0 ; RV64-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; RV64-NEXT: vnsrl.wi v26, v25, 0 -; RV64-NEXT: vsetivli a0, 1, e16,mf4,ta,mu ; RV64-NEXT: vsoxei64.v v26, (zero), v9, v0.t ; RV64-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i16> @@ -287,13 +285,13 @@ declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x define void @mscatter_v4i16(<4 x i16> %val, <4 x i16*> %ptrs, <4 x i1> %m) { ; RV32-LABEL: mscatter_v4i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV32-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v4i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV64-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %val, <4 x i16*> %ptrs, i32 2, <4 x i1> %m) @@ -303,13 +301,13 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x i16*> %ptrs, <4 x i1> %m) { define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x i16*> %ptrs) { ; RV32-LABEL: mscatter_truemask_v4i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV32-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_truemask_v4i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV64-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret %mhead = insertelement <4 x i1> undef, i1 1, i32 0 @@ -335,13 +333,13 @@ declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x define void @mscatter_v8i16(<8 x i16> %val, <8 x i16*> %ptrs, <8 x i1> %m) { ; RV32-LABEL: mscatter_v8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a0, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a0, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %val, <8 x i16*> %ptrs, i32 2, <8 x i1> %m) @@ -354,7 +352,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i8> %i ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v9 ; RV32-NEXT: vsll.vi v26, v26, 1 -; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -363,7 +361,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i8> %i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v9 ; RV64-NEXT: vsll.vi v28, v28, 1 -; RV64-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i16, i16* %base, <8 x i8> %idxs @@ -377,7 +375,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v9 ; RV32-NEXT: vsll.vi v26, v26, 1 -; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -386,7 +384,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v9 ; RV64-NEXT: vsll.vi v28, v28, 1 -; RV64-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i16> @@ -401,7 +399,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vzext.vf4 v26, v9 ; RV32-NEXT: vsll.vi v26, v26, 1 -; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -410,7 +408,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf8 v28, v9 ; RV64-NEXT: vsll.vi v28, v28, 1 -; RV64-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> @@ -425,7 +423,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, i16* %base, <8 x i16> %idxs, ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf2 v26, v9 ; RV32-NEXT: vsll.vi v26, v26, 1 -; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -434,7 +432,7 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, i16* %base, <8 x i16> %idxs, ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v9 ; RV64-NEXT: vsll.vi v28, v28, 1 -; RV64-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> %idxs @@ -447,7 +445,7 @@ declare void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32>, <1 x i32*>, i32, <1 x define void @mscatter_v1i32(<1 x i32> %val, <1 x i32*> %ptrs, <1 x i1> %m) { ; RV32-LABEL: mscatter_v1i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV32-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; @@ -471,7 +469,7 @@ define void @mscatter_v2i32(<2 x i32> %val, <2 x i32*> %ptrs, <2 x i1> %m) { ; ; RV64-LABEL: mscatter_v2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 1, e32,mf2,ta,mu +; RV64-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %val, <2 x i32*> %ptrs, i32 4, <2 x i1> %m) @@ -491,7 +489,6 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x i32*> %ptrs, < ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; RV64-NEXT: vnsrl.wi v25, v8, 0 -; RV64-NEXT: vsetivli a0, 1, e32,mf2,ta,mu ; RV64-NEXT: vsoxei64.v v25, (zero), v9, v0.t ; RV64-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i32> @@ -504,13 +501,13 @@ declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x define void @mscatter_v4i32(<4 x i32> %val, <4 x i32*> %ptrs, <4 x i1> %m) { ; RV32-LABEL: mscatter_v4i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV64-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 4, <4 x i1> %m) @@ -520,13 +517,13 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x i32*> %ptrs, <4 x i1> %m) { define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x i32*> %ptrs) { ; RV32-LABEL: mscatter_truemask_v4i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_truemask_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV64-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret %mhead = insertelement <4 x i1> undef, i1 1, i32 0 @@ -552,13 +549,13 @@ declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x define void @mscatter_v8i32(<8 x i32> %val, <8 x i32*> %ptrs, <8 x i1> %m) { ; RV32-LABEL: mscatter_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 4, e32,m2,ta,mu +; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m) @@ -571,7 +568,6 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i8> %i ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -580,7 +576,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i8> %i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i32, i32* %base, <8 x i8> %idxs @@ -594,7 +590,6 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -603,7 +598,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i32> @@ -618,7 +613,6 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vzext.vf4 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -627,7 +621,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf8 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> @@ -642,7 +636,6 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x i16> ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf2 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -651,7 +644,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x i16> ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i32, i32* %base, <8 x i16> %idxs @@ -665,7 +658,6 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf2 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -674,7 +666,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i16> %idxs to <8 x i32> @@ -689,7 +681,6 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vzext.vf2 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -698,7 +689,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf4 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> @@ -712,7 +703,6 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, i32* %base, <8 x i32> %idxs, ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsll.vi v26, v10, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -721,7 +711,7 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, i32* %base, <8 x i32> %idxs, ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf2 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i32, i32* %base, <8 x i32> %idxs @@ -752,13 +742,13 @@ declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64>, <2 x i64*>, i32, <2 x define void @mscatter_v2i64(<2 x i64> %val, <2 x i64*> %ptrs, <2 x i1> %m) { ; RV32-LABEL: mscatter_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %val, <2 x i64*> %ptrs, i32 8, <2 x i1> %m) @@ -770,13 +760,13 @@ declare void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64>, <4 x i64*>, i32, <4 x define void @mscatter_v4i64(<4 x i64> %val, <4 x i64*> %ptrs, <4 x i1> %m) { ; RV32-LABEL: mscatter_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> %val, <4 x i64*> %ptrs, i32 8, <4 x i1> %m) @@ -786,13 +776,13 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x i64*> %ptrs, <4 x i1> %m) { define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x i64*> %ptrs) { ; RV32-LABEL: mscatter_truemask_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v10 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_truemask_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret %mhead = insertelement <4 x i1> undef, i1 1, i32 0 @@ -818,13 +808,13 @@ declare void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64>, <8 x i64*>, i32, <8 x define void @mscatter_v8i64(<8 x i64> %val, <8 x i64*> %ptrs, <8 x i1> %m) { ; RV32-LABEL: mscatter_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) @@ -837,7 +827,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %i ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v12 ; RV32-NEXT: vsll.vi v26, v26, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -846,7 +836,6 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, i64* %base, <8 x i8> %idxs @@ -860,7 +849,6 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -869,7 +857,6 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i64> @@ -884,7 +871,6 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -893,7 +879,6 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf8 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> @@ -908,7 +893,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf2 v26, v12 ; RV32-NEXT: vsll.vi v26, v26, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -917,7 +902,6 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, i64* %base, <8 x i16> %idxs @@ -931,7 +915,6 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -940,7 +923,6 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i16> %idxs to <8 x i64> @@ -955,7 +937,6 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -964,7 +945,6 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf4 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> @@ -978,7 +958,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsll.vi v26, v12, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -987,7 +967,6 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf2 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, i64* %base, <8 x i32> %idxs @@ -1001,7 +980,6 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1010,7 +988,6 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf2 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i32> %idxs to <8 x i64> @@ -1025,7 +1002,6 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1034,7 +1010,6 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf2 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i32> %idxs to <8 x i64> @@ -1048,7 +1023,6 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs, ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vi v28, v12, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1056,7 +1030,6 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs, ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsll.vi v28, v12, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, i64* %base, <8 x i64> %idxs @@ -1069,7 +1042,7 @@ declare void @llvm.masked.scatter.v1f16.v1p0f16(<1 x half>, <1 x half*>, i32, <1 define void @mscatter_v1f16(<1 x half> %val, <1 x half*> %ptrs, <1 x i1> %m) { ; RV32-LABEL: mscatter_v1f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV32-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; @@ -1093,7 +1066,7 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x half*> %ptrs, <2 x i1> %m) { ; ; RV64-LABEL: mscatter_v2f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 1, e16,mf4,ta,mu +; RV64-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> %val, <2 x half*> %ptrs, i32 2, <2 x i1> %m) @@ -1105,13 +1078,13 @@ declare void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half>, <4 x half*>, i32, <4 define void @mscatter_v4f16(<4 x half> %val, <4 x half*> %ptrs, <4 x i1> %m) { ; RV32-LABEL: mscatter_v4f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV32-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v4f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV64-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> %val, <4 x half*> %ptrs, i32 2, <4 x i1> %m) @@ -1121,13 +1094,13 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x half*> %ptrs, <4 x i1> %m) { define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x half*> %ptrs) { ; RV32-LABEL: mscatter_truemask_v4f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV32-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_truemask_v4f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e16,mf2,ta,mu +; RV64-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret %mhead = insertelement <4 x i1> undef, i1 1, i32 0 @@ -1153,13 +1126,13 @@ declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8 define void @mscatter_v8f16(<8 x half> %val, <8 x half*> %ptrs, <8 x i1> %m) { ; RV32-LABEL: mscatter_v8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a0, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a0, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %val, <8 x half*> %ptrs, i32 2, <8 x i1> %m) @@ -1172,7 +1145,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, half* %base, <8 x i8> ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v9 ; RV32-NEXT: vsll.vi v26, v26, 1 -; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1181,7 +1154,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, half* %base, <8 x i8> ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v9 ; RV64-NEXT: vsll.vi v28, v28, 1 -; RV64-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds half, half* %base, <8 x i8> %idxs @@ -1195,7 +1168,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v9 ; RV32-NEXT: vsll.vi v26, v26, 1 -; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1204,7 +1177,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v9 ; RV64-NEXT: vsll.vi v28, v28, 1 -; RV64-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i16> @@ -1219,7 +1192,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vzext.vf4 v26, v9 ; RV32-NEXT: vsll.vi v26, v26, 1 -; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1228,7 +1201,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf8 v28, v9 ; RV64-NEXT: vsll.vi v28, v28, 1 -; RV64-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> @@ -1243,7 +1216,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, half* %base, <8 x i16> %idx ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf2 v26, v9 ; RV32-NEXT: vsll.vi v26, v26, 1 -; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1252,7 +1225,7 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, half* %base, <8 x i16> %idx ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v9 ; RV64-NEXT: vsll.vi v28, v28, 1 -; RV64-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds half, half* %base, <8 x i16> %idxs @@ -1265,7 +1238,7 @@ declare void @llvm.masked.scatter.v1f32.v1p0f32(<1 x float>, <1 x float*>, i32, define void @mscatter_v1f32(<1 x float> %val, <1 x float*> %ptrs, <1 x i1> %m) { ; RV32-LABEL: mscatter_v1f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV32-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; @@ -1289,7 +1262,7 @@ define void @mscatter_v2f32(<2 x float> %val, <2 x float*> %ptrs, <2 x i1> %m) { ; ; RV64-LABEL: mscatter_v2f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 1, e32,mf2,ta,mu +; RV64-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %val, <2 x float*> %ptrs, i32 4, <2 x i1> %m) @@ -1301,13 +1274,13 @@ declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, define void @mscatter_v4f32(<4 x float> %val, <4 x float*> %ptrs, <4 x i1> %m) { ; RV32-LABEL: mscatter_v4f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v4f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV64-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %val, <4 x float*> %ptrs, i32 4, <4 x i1> %m) @@ -1317,13 +1290,13 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x float*> %ptrs, <4 x i1> %m) { define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x float*> %ptrs) { ; RV32-LABEL: mscatter_truemask_v4f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_truemask_v4f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; RV64-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret %mhead = insertelement <4 x i1> undef, i1 1, i32 0 @@ -1349,13 +1322,13 @@ declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, define void @mscatter_v8f32(<8 x float> %val, <8 x float*> %ptrs, <8 x i1> %m) { ; RV32-LABEL: mscatter_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 4, e32,m2,ta,mu +; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m) @@ -1368,7 +1341,6 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, float* %base, <8 x i8 ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1377,7 +1349,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, float* %base, <8 x i8 ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds float, float* %base, <8 x i8> %idxs @@ -1391,7 +1363,6 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, float* %base, <8 ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1400,7 +1371,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, float* %base, <8 ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i32> @@ -1415,7 +1386,6 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, float* %base, <8 ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vzext.vf4 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1424,7 +1394,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, float* %base, <8 ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf8 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> @@ -1439,7 +1409,6 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, float* %base, <8 x i ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf2 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1448,7 +1417,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, float* %base, <8 x i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds float, float* %base, <8 x i16> %idxs @@ -1462,7 +1431,6 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, float* %base, < ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf2 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1471,7 +1439,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, float* %base, < ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i16> %idxs to <8 x i32> @@ -1486,7 +1454,6 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, float* %base, < ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vzext.vf2 v26, v10 ; RV32-NEXT: vsll.vi v26, v26, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1495,7 +1462,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, float* %base, < ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf4 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> @@ -1509,7 +1476,6 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, float* %base, <8 x i32> %i ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsll.vi v26, v10, 2 -; RV32-NEXT: vsetivli a1, 4, e32,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1518,7 +1484,7 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, float* %base, <8 x i32> %i ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf2 v28, v10 ; RV64-NEXT: vsll.vi v28, v28, 2 -; RV64-NEXT: vsetivli a1, 4, e32,m2,ta,mu +; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds float, float* %base, <8 x i32> %idxs @@ -1549,13 +1515,13 @@ declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32 define void @mscatter_v2f64(<2 x double> %val, <2 x double*> %ptrs, <2 x i1> %m) { ; RV32-LABEL: mscatter_v2f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v2f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> %m) @@ -1567,13 +1533,13 @@ declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32 define void @mscatter_v4f64(<4 x double> %val, <4 x double*> %ptrs, <4 x i1> %m) { ; RV32-LABEL: mscatter_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %val, <4 x double*> %ptrs, i32 8, <4 x i1> %m) @@ -1583,13 +1549,13 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x double*> %ptrs, <4 x i1> %m) define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x double*> %ptrs) { ; RV32-LABEL: mscatter_truemask_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v10 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_truemask_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 2, e64,m2,ta,mu +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret %mhead = insertelement <4 x i1> undef, i1 1, i32 0 @@ -1615,13 +1581,13 @@ declare void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double>, <8 x double*>, i32 define void @mscatter_v8f64(<8 x double> %val, <8 x double*> %ptrs, <8 x i1> %m) { ; RV32-LABEL: mscatter_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli a0, 4, e64,m4,ta,mu +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t ; RV64-NEXT: ret call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) @@ -1634,7 +1600,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, double* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf4 v26, v12 ; RV32-NEXT: vsll.vi v26, v26, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1643,7 +1609,6 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, double* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, double* %base, <8 x i8> %idxs @@ -1657,7 +1622,6 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base, ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1666,7 +1630,6 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base, ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf8 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i64> @@ -1681,7 +1644,6 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base, ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1690,7 +1652,6 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base, ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf8 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> @@ -1705,7 +1666,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, double* %base, <8 x ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsext.vf2 v26, v12 ; RV32-NEXT: vsll.vi v26, v26, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1714,7 +1675,6 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, double* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, double* %base, <8 x i16> %idxs @@ -1728,7 +1688,6 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base, ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1737,7 +1696,6 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base, ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf4 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i16> %idxs to <8 x i64> @@ -1752,7 +1710,6 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base, ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1761,7 +1718,6 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base, ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf4 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> @@ -1775,7 +1731,7 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, double* %base, <8 x ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; RV32-NEXT: vsll.vi v26, v12, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v26, v0.t ; RV32-NEXT: ret ; @@ -1784,7 +1740,6 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, double* %base, <8 x ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf2 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, double* %base, <8 x i32> %idxs @@ -1798,7 +1753,6 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base, ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1807,7 +1761,6 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base, ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsext.vf2 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = sext <8 x i32> %idxs to <8 x i64> @@ -1822,7 +1775,6 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base, ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v12 ; RV32-NEXT: vsll.vi v28, v28, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1831,7 +1783,6 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base, ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vzext.vf2 v28, v12 ; RV64-NEXT: vsll.vi v28, v28, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i32> %idxs to <8 x i64> @@ -1845,7 +1796,6 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64> ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vi v28, v12, 3 -; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1853,7 +1803,6 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64> ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV64-NEXT: vsll.vi v28, v12, 3 -; RV64-NEXT: vsetivli a1, 4, e64,m4,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, double* %base, <8 x i64> %idxs @@ -1868,7 +1817,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs, ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v9 -; RV32-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1876,7 +1825,7 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs, ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu ; RV64-NEXT: vsext.vf8 v16, v9 -; RV64-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %idxs @@ -1890,9 +1839,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs, ; RV32-LABEL: mscatter_baseidx_v32i8: ; RV32: # %bb.0: ; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu ; RV32-NEXT: vsext.vf4 v16, v10 -; RV32-NEXT: vsetivli a1, 16, e8,m2,ta,mu +; RV32-NEXT: vsetvli a1, a1, e8,m2,ta,mu ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; @@ -1900,7 +1849,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs, ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu ; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: vsetivli a1, 16, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 16 @@ -1909,7 +1858,7 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs, ; RV64-NEXT: vsext.vf8 v8, v28 ; RV64-NEXT: vsetivli a1, 2, e8,m1,ta,mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu ; RV64-NEXT: vsoxei64.v v26, (a0), v8, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs -- 2.7.4