From d8562e27e05b90d8957d20444c724293ddf1ba0c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 13 Jun 2023 16:19:02 -0700 Subject: [PATCH] [RISCV] Canonicalize towards vmerge w/passthrough representation This is the first patch in a series to change how we represent tail agnostic, tail undefined, and tail undisturbed operations. In current code, we tend to use an unsuffixed pseudo for undefined (despite calling it TA most places in code), and the _TU form for both agnostic and undisturbed (via the policy operand). The key observation behind this patch is that we can represent tail undefined via a pseudo with a passthrough operand if that operand is IMPLICIT_DEF (aka undef). We already have a few instances of this in tree - see vmv.s.x and vslide* - but we can do this more universally. Once complete, we will be able to delete roughly ~1/3 of our vector pseudo classes. A bit more information on the overall goal can be found in this discourse post: https://discourse.llvm.org/t/riscv-transition-in-vector-pseudo-structure-policy-variants/71295. This patch doesn't actually remove the legacy unsuffixed pseudo as there's still some path from intrinsic lowering which uses it. (I have not yet located it.) This also means we don't have to modify any of the lookup tables which makes the migration simpler. We can defer deleting the tables and pseudos until one final change once all the instructions have been migrated. There are a couple of regressions in the tests. At first, these concerned me, but it turns out that all of them are differences in expansion of a single source level instruction. I think we can safely ignore this for the moment. I did explore changing the handling of IMPLICIT_DEF in ScheduleDAG, but that causes an absolutely *massive* test diff with minimal profit. I really don't think it's worth doing. Differential Revision: https://reviews.llvm.org/D152380 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 19 ++- llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td | 18 ++- llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 21 ++- llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll | 50 +++--- llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll | 80 ++++----- .../RISCV/rvv/named-vector-shuffle-reverse.ll | 62 ++++--- llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 180 ++++++++++++--------- 9 files changed, 244 insertions(+), 198 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 6159571..abc03ee 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3238,6 +3238,11 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { return true; } +static bool isImplicitDef(SDValue V) { + return V.isMachineOpcode() && + V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; +} + // Try to fold away VMERGE_VVM instructions. We handle these cases: // -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction // folds to a masked TU instruction. VMERGE_VVM must have have merge operand @@ -3247,9 +3252,14 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { // -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to // masked TU instruction. Both instructions must have the same merge operand. // VMERGE_VVM must have have merge operand same as false operand. +// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied, +// not the pseudo name. That is, a TA VMERGE_VVM can be either the _TU pseudo +// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo +// form. bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) { unsigned Offset = IsTA ? 0 : 1; - uint64_t Policy = IsTA ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; + uint64_t Policy = (IsTA || isImplicitDef(N->getOperand(0))) ? + RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; SDValue False = N->getOperand(0 + Offset); SDValue True = N->getOperand(1 + Offset); @@ -3286,7 +3296,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) { // The vmerge instruction must be TU. // FIXME: This could be relaxed, but we need to handle the policy for the // resulting op correctly. - if (IsTA) + if (IsTA || isImplicitDef(N->getOperand(0))) return false; SDValue MergeOpTrue = True->getOperand(0); // Both the vmerge instruction and the True instruction must have the same @@ -3298,7 +3308,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) { if (IsMasked) { assert(HasTiedDest && "Expected tied dest"); // The vmerge instruction must be TU. - if (IsTA) + if (IsTA || isImplicitDef(N->getOperand(0))) return false; // The vmerge instruction must have an all 1s mask since we're going to keep // the mask from the True instruction. @@ -3465,7 +3475,8 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { unsigned Opc = N->getMachineOpcode(); // The following optimizations require that the merge operand of N is same // as the false operand of N. - if ((IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) || + if ((IsVMergeTU(Opc) && (N->getOperand(0) == N->getOperand(1) || + isImplicitDef(N->getOperand(0)))) || IsVMergeTA(Opc)) MadeChange |= performCombineVMergeAndVOps(N, IsVMergeTA(Opc)); if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 053b33e..2beb909 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -853,18 +853,21 @@ foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vselect (vti.Mask V0), vti.RegClass:$rs1, vti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat XLenVT:$rs1), vti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat_simm5 simm5:$rs1), vti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; } } @@ -1066,14 +1069,16 @@ foreach fvti = AllFloatVectors in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1, fvti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; def : Pat<(fvti.Vector (vselect (fvti.Mask V0), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2)), - (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) + (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; @@ -1081,7 +1086,8 @@ foreach fvti = AllFloatVectors in { def : Pat<(fvti.Vector (vselect (fvti.Mask V0), (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 22c9738..abf1290 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1732,7 +1732,8 @@ foreach vti = AllIntegerVectors in { vti.RegClass:$rs1, vti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; @@ -1740,14 +1741,16 @@ foreach vti = AllIntegerVectors in { (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), (SplatPat_simm5 simm5:$rs1), vti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), @@ -1951,7 +1954,8 @@ foreach fvti = AllFloatVectors in { fvti.RegClass:$rs1, fvti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; @@ -1959,7 +1963,8 @@ foreach fvti = AllFloatVectors in { (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) + (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; @@ -1968,7 +1973,8 @@ foreach fvti = AllFloatVectors in { (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), fvti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; @@ -1977,7 +1983,8 @@ foreach fvti = AllFloatVectors in { (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index ba84867..5db18d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -154,9 +154,9 @@ define i1 @extractelt_nxv128i1(* %x, i64 %idx) nounwind { ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vmerge.vim v24, v16, 1, v0 ; RV32-NEXT: vs8r.v v24, (a3) -; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: vs8r.v v8, (a2) ; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -80 @@ -194,9 +194,9 @@ define i1 @extractelt_nxv128i1(* %x, i64 %idx) nounwind { ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vmerge.vim v24, v16, 1, v0 ; RV64-NEXT: vs8r.v v24, (a3) -; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: vs8r.v v8, (a2) ; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -80 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll index 6cb96c9..b823814 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -20,14 +20,14 @@ define <1 x i1> @insertelt_idx_v1i1(<1 x i1> %x, i1 %elt, i32 zeroext %idx) noun ; CHECK-LABEL: insertelt_idx_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <1 x i1> %x, i1 %elt, i32 %idx @@ -38,11 +38,11 @@ define <2 x i1> @insertelt_v2i1(<2 x i1> %x, i1 %elt) nounwind { ; CHECK-LABEL: insertelt_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <2 x i1> %x, i1 %elt, i64 1 @@ -53,14 +53,14 @@ define <2 x i1> @insertelt_idx_v2i1(<2 x i1> %x, i1 %elt, i32 zeroext %idx) noun ; CHECK-LABEL: insertelt_idx_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <2 x i1> %x, i1 %elt, i32 %idx @@ -71,13 +71,13 @@ define <8 x i1> @insertelt_v8i1(<8 x i1> %x, i1 %elt) nounwind { ; CHECK-LABEL: insertelt_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <8 x i1> %x, i1 %elt, i64 1 @@ -88,14 +88,14 @@ define <8 x i1> @insertelt_idx_v8i1(<8 x i1> %x, i1 %elt, i32 zeroext %idx) noun ; CHECK-LABEL: insertelt_idx_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <8 x i1> %x, i1 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll index e5f39a8..f4c9d26 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -438,14 +438,14 @@ define @insert_nxv4i1_nxv1i1_2( %v, @insertelt_nxv1i1( %x, i1 %elt) { ; CHECK-LABEL: insertelt_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf8, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 2 @@ -23,14 +23,14 @@ define @insertelt_idx_nxv1i1( %x, i1 %elt, i6 ; CHECK-LABEL: insertelt_idx_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 %idx @@ -41,13 +41,13 @@ define @insertelt_nxv2i1( %x, i1 %elt) { ; CHECK-LABEL: insertelt_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 2 @@ -58,14 +58,14 @@ define @insertelt_idx_nxv2i1( %x, i1 %elt, i6 ; CHECK-LABEL: insertelt_idx_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 %idx @@ -76,13 +76,13 @@ define @insertelt_nxv4i1( %x, i1 %elt) { ; CHECK-LABEL: insertelt_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 2 @@ -93,14 +93,14 @@ define @insertelt_idx_nxv4i1( %x, i1 %elt, i6 ; CHECK-LABEL: insertelt_idx_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 %idx @@ -111,13 +111,13 @@ define @insertelt_nxv8i1( %x, i1 %elt) { ; CHECK-LABEL: insertelt_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 2 @@ -128,14 +128,14 @@ define @insertelt_idx_nxv8i1( %x, i1 %elt, i6 ; CHECK-LABEL: insertelt_idx_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll index 66fde7a..11dba17 100644 --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -497,18 +497,18 @@ define @reverse_nxv32i1( %a) { define @reverse_nxv64i1( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v8 -; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 -; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v8, v16 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v12, v16 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 @@ -531,18 +531,17 @@ define @reverse_nxv64i1( %a) { ; ; RV32-BITS-512-LABEL: reverse_nxv64i1: ; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV32-BITS-512-NEXT: vmv.v.i v8, 0 +; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 ; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vid.v v8 -; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-512-NEXT: vmv.v.i v16, 0 -; RV32-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vrgather.vv v28, v16, v8 -; RV32-BITS-512-NEXT: vrgather.vv v24, v20, v8 +; RV32-BITS-512-NEXT: vid.v v16 +; RV32-BITS-512-NEXT: vrsub.vx v16, v16, a0 +; RV32-BITS-512-NEXT: vrgather.vv v28, v8, v16 +; RV32-BITS-512-NEXT: vrgather.vv v24, v12, v16 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-512-NEXT: vand.vi v8, v24, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 @@ -550,18 +549,18 @@ define @reverse_nxv64i1( %a) { ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v8 -; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 -; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v8, v16 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v12, v16 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 @@ -584,18 +583,17 @@ define @reverse_nxv64i1( %a) { ; ; RV64-BITS-512-LABEL: reverse_nxv64i1: ; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-BITS-512-NEXT: vmv.v.i v8, 0 +; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vid.v v8 -; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-512-NEXT: vmv.v.i v16, 0 -; RV64-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vrgather.vv v28, v16, v8 -; RV64-BITS-512-NEXT: vrgather.vv v24, v20, v8 +; RV64-BITS-512-NEXT: vid.v v16 +; RV64-BITS-512-NEXT: vrsub.vx v16, v16, a0 +; RV64-BITS-512-NEXT: vrgather.vv v28, v8, v16 +; RV64-BITS-512-NEXT: vrgather.vv v24, v12, v16 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-512-NEXT: vand.vi v8, v24, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll index 67431f7..d0272c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -9,17 +9,19 @@ declare @llvm.experimental.vector.splice.nxv1i1( @splice_nxv1i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv1i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslidedown.vx v9, v9, a0 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv1i1( %a, %b, i32 -1) @@ -29,19 +31,21 @@ define @splice_nxv1i1_offset_negone( %a, @splice_nxv1i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv1i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 1 +; CHECK-NEXT: vslidedown.vi v9, v9, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv1i1( %a, %b, i32 1) @@ -53,17 +57,19 @@ declare @llvm.experimental.vector.splice.nxv2i1( @splice_nxv2i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslidedown.vx v9, v9, a0 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv2i1( %a, %b, i32 -1) @@ -73,19 +79,21 @@ define @splice_nxv2i1_offset_negone( %a, @splice_nxv2i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -3 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 3 +; CHECK-NEXT: vslidedown.vi v9, v9, 3 ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv2i1( %a, %b, i32 3) @@ -97,17 +105,19 @@ declare @llvm.experimental.vector.splice.nxv4i1( @splice_nxv4i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslidedown.vx v9, v9, a0 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv4i1( %a, %b, i32 -1) @@ -117,19 +127,21 @@ define @splice_nxv4i1_offset_negone( %a, @splice_nxv4i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -7 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 7 +; CHECK-NEXT: vslidedown.vi v9, v9, 7 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv4i1( %a, %b, i32 7) @@ -141,16 +153,18 @@ declare @llvm.experimental.vector.splice.nxv8i1( @splice_nxv8i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslidedown.vx v9, v9, a0 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv8i1( %a, %b, i32 -1) @@ -160,18 +174,20 @@ define @splice_nxv8i1_offset_negone( %a, @splice_nxv8i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -15 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 15 +; CHECK-NEXT: vslidedown.vi v9, v9, 15 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv8i1( %a, %b, i32 15) @@ -183,19 +199,21 @@ declare @llvm.experimental.vector.splice.nxv16i1( @splice_nxv16i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv16i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v12, a0 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vslideup.vi v12, v8, 1 -; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv16i1( %a, %b, i32 -1) @@ -205,19 +223,21 @@ define @splice_nxv16i1_offset_negone( %a, < define @splice_nxv16i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv16i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -31 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v12, 31 +; CHECK-NEXT: vslidedown.vi v8, v8, 31 ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vslideup.vx v12, v8, a0 -; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv16i1( %a, %b, i32 31) @@ -229,19 +249,21 @@ declare @llvm.experimental.vector.splice.nxv32i1( @splice_nxv32i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv32i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v16, v16, a0 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 -; CHECK-NEXT: vslideup.vi v16, v8, 1 -; CHECK-NEXT: vand.vi v8, v16, 1 +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv32i1( %a, %b, i32 -1) @@ -276,19 +298,21 @@ declare @llvm.experimental.vector.splice.nxv64i1( @splice_nxv64i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv64i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v24, a0 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: vslideup.vi v24, v8, 1 -; CHECK-NEXT: vand.vi v8, v24, 1 +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv64i1( %a, %b, i32 -1) -- 2.7.4