From badf11de4ac63081180893aa757bbafd1e672132 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 31 May 2023 14:29:28 +0000 Subject: [PATCH] [RISCV][InsertVSETVLI] Avoid vmv.s.x SEW toggle if at start of block vmv.s.x/vfmv.s.f instructions that only write to the first destination element can use any SEW greater than or equal to its original SEW, provided that it's writing to an implicit_def operand where we can clobber the other lanes. We were already handling this in needVSETVLI, which meant that when scanning the instructions from top to bottom we could detect this and avoid the toggle: vsetivli zero, 4, e64, mf2, ta, ma li a0, 11 vsetivli zero, 1, e8, mf8, ta, ma vmv.s.x v0, a0 -> vsetivli zero, 4, e64, mf2, ta, ma li a0, 11 vmv.s.x v0, a0 The issue that this patch aims to solve is arises when the vmv.s.x is the first vector instruction in the block and doesn't have any prior predecessor info: entry_bb: li a0, 11 ; No previous state here: forced to set VL/VTYPE vsetivli zero, 1, e8, mf8, ta, ma vmv.s.x v0, a0 vsetivli zero, 4, e16, mf2, ta, ma vmerge.vvm v8, v9, v8, v0 doLocalPostpass can work backwards from bottom to top and work out if an earlier vsetvli can be mutated to avoid a toggle. It uses DemandedFields and getDemanded for this, which previously didn't take into account the possibility of going to a larger SEW. A previous patch consolidated the vmv.s.x logic from needVSETVLI logic into getDemanded, and this patch removes the gate around it so that doLocalPostpass can now delete vsetvlis like in the scenario below: entry_bb: li a0, 11 ; Previous vsetivli mutated: second one deleted vsetivli zero, 4, e16, mf2, ta, ma vmv.s.x v0, a0 vmerge.vvm v8, v9, v8, v0 Differential Revision: https://reviews.llvm.org/D151561 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 11 ++++---- .../RISCV/rvv/fixed-vector-shuffle-transpose.ll | 12 +++------ .../RISCV/rvv/fixed-vectors-bitreverse-vp.ll | 18 +++++--------- .../CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll | 18 +++++--------- .../CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll | 6 ++--- .../CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll | 26 +++++++------------ .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 29 ++++++++-------------- .../CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll | 3 +-- 8 files changed, 44 insertions(+), 79 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index bd3f38c..8706785 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -259,8 +259,7 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, /// Return the fields and properties demanded by the provided instruction. DemandedFields getDemanded(const MachineInstr &MI, - const MachineRegisterInfo *MRI, - bool ExpandVMVSXSEW) { + const MachineRegisterInfo *MRI) { // Warning: This function has to work on both the lowered (i.e. post // emitVSETVLIs) and pre-lowering forms. The main implication of this is // that it can't use the value of a SEW, VL, or Policy operand as they might @@ -323,7 +322,7 @@ DemandedFields getDemanded(const MachineInstr &MI, // tail lanes to either be the original value or -1. We are writing // unknown bits to the lanes here. auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); - if (ExpandVMVSXSEW && VRegDef && VRegDef->isImplicitDef()) { + if (VRegDef && VRegDef->isImplicitDef()) { Res.SEW = DemandedFields::SEWGreaterThanOrEqual; } } @@ -888,7 +887,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) return true; - DemandedFields Used = getDemanded(MI, MRI, true); + DemandedFields Used = getDemanded(MI, MRI); if (isScalarMoveInstr(MI)) { // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't @@ -1408,7 +1407,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { if (!isVectorConfigInstr(MI)) { - doUnion(Used, getDemanded(MI, MRI, false)); + doUnion(Used, getDemanded(MI, MRI)); continue; } @@ -1436,7 +1435,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { } } NextMI = &MI; - Used = getDemanded(MI, MRI, false); + Used = getDemanded(MI, MRI); } for (auto *MI : ToDelete) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll index 37f67ca..deceb6f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll @@ -182,9 +182,8 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-LABEL: trn2.v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -239,9 +238,8 @@ define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) { ; CHECK-LABEL: trn2.v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -264,9 +262,8 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { ; CHECK-LABEL: trn2.v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -321,9 +318,8 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { ; CHECK-LABEL: trn2.v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index eb45b3c..8bcb74c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1400,9 +1400,8 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e ; RV32-NEXT: vand.vx v11, v11, a4, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t ; RV32-NEXT: li a5, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v13, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v13, v13, a5, v0 @@ -1525,9 +1524,8 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: li a4, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v11, v11, a4, v0 @@ -1660,9 +1658,8 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e ; RV32-NEXT: vand.vx v14, v14, a4, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: li a5, 85 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v18, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 @@ -1785,9 +1782,8 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: li a4, 85 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 @@ -1921,9 +1917,8 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e ; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t ; RV32-NEXT: lui a5, 5 ; RV32-NEXT: addi a5, a5, 1365 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v20, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 @@ -2047,9 +2042,8 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: lui a4, 5 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 9280565..590c631 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -440,9 +440,8 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vand.vx v11, v11, a4, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t ; RV32-NEXT: li a5, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v13, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v13, v13, a5, v0 @@ -514,9 +513,8 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: li a4, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v11, v11, a4, v0 @@ -598,9 +596,8 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vand.vx v14, v14, a4, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: li a5, 85 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v18, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 @@ -672,9 +669,8 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: li a4, 85 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 @@ -757,9 +753,8 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t ; RV32-NEXT: lui a5, 5 ; RV32-NEXT: addi a5, a5, 1365 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v20, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 @@ -832,9 +827,8 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: lui a4, 5 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 30ebcf7..3418f86 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -36,9 +36,8 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, ; LMULMAX1-LABEL: hang_when_merging_stores_after_legalization: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: li a0, 2 -; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmv.s.x v0, a0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-NEXT: vmv.s.x v0, a0 ; LMULMAX1-NEXT: vrgather.vi v12, v8, 0 ; LMULMAX1-NEXT: vrgather.vi v12, v9, 3, v0.t ; LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma @@ -152,9 +151,8 @@ define void @buildvec_merge0_v4f32(<4 x float>* %x, float %f) { ; CHECK-LABEL: buildvec_merge0_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 6 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: lui a1, 262144 ; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index 4d7d08e..944dea2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -6,9 +6,8 @@ define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) { ; CHECK-LABEL: shuffle_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 11 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> @@ -19,9 +18,8 @@ define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) { ; CHECK-LABEL: shuffle_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 236 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> @@ -34,9 +32,8 @@ define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) { ; RV32-NEXT: li a0, 9 ; RV32-NEXT: lui a1, %hi(.LCPI2_0) ; RV32-NEXT: fld fa5, %lo(.LCPI2_0)(a1) -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; RV32-NEXT: ret ; @@ -45,9 +42,8 @@ define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) { ; RV64-NEXT: lui a0, %hi(.LCPI2_0) ; RV64-NEXT: fld fa5, %lo(.LCPI2_0)(a0) ; RV64-NEXT: li a0, 9 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; RV64-NEXT: ret %s = shufflevector <4 x double> , <4 x double> %x, <4 x i32> @@ -60,9 +56,8 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { ; RV32-NEXT: li a0, 6 ; RV32-NEXT: lui a1, %hi(.LCPI3_0) ; RV32-NEXT: fld fa5, %lo(.LCPI3_0)(a1) -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; RV32-NEXT: ret ; @@ -71,9 +66,8 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { ; RV64-NEXT: lui a0, %hi(.LCPI3_0) ; RV64-NEXT: fld fa5, %lo(.LCPI3_0)(a0) ; RV64-NEXT: li a0, 6 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; RV64-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> @@ -162,9 +156,8 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; RV32-LABEL: vrgather_shuffle_xv_v4f64: ; RV32: # %bb.0: ; RV32-NEXT: li a0, 12 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -177,13 +170,12 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; ; RV64-LABEL: vrgather_shuffle_xv_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 12 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: lui a0, %hi(.LCPI7_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) ; RV64-NEXT: vlse64.v v10, (a0), zero +; RV64-NEXT: li a0, 12 +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vid.v v12 ; RV64-NEXT: vrsub.vi v12, v12, 4 ; RV64-NEXT: vrgather.vv v10, v8, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 5f07294..3c9a052 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -6,9 +6,8 @@ define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: shuffle_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 11 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> @@ -19,9 +18,8 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: shuffle_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 203 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> @@ -32,9 +30,8 @@ define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: shuffle_xv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 9 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> , <4 x i16> %x, <4 x i32> @@ -45,9 +42,8 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) { ; CHECK-LABEL: shuffle_vx_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 6 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> @@ -103,9 +99,8 @@ define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 12 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vi v10, v9, 4 ; CHECK-NEXT: vmv.v.i v9, 5 @@ -238,13 +233,12 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; ; RV64-LABEL: vrgather_shuffle_xv_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 113 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: lui a0, %hi(.LCPI12_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: li a0, 113 +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 @@ -273,13 +267,12 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) { ; ; RV64-LABEL: vrgather_shuffle_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 115 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: lui a0, %hi(.LCPI13_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: li a0, 115 +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vmv.v.i v12, 5 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll index 65055b0..baf5f9c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -230,9 +230,8 @@ define void @splat_v4i64(ptr %x, i64 %y) { ; LMULMAX1-RV32-LABEL: splat_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: li a3, 5 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 ; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 ; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -- 2.7.4