From af773a18181dc1a1e3846f518b2d44f2abbbdf87 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 18 Jan 2022 14:13:13 +0000 Subject: [PATCH] [RISCV][VP] Lower VP_MERGE to RVV instructions This patch adds lowering of the llvm.vp.merge.* intrinsic (ISD::VP_MERGE) to RVV vmerge/vfmerge instructions. It introduces a special pseudo form of vmerge which allows a tied merge operand, allowing us to specify the tail elements as being equal to the "on false" operand, using a tied-def constraint and a "tail undisturbed" policy. While this strategy allows us to often lower the intrinsic to just one instruction, it may be less efficient in fixed-vector types as the number of tail elements may extend far beyond the length of the fixed vector. Another strategy could be to use a vmerge/vfmerge instruction with an AVL equal to the length of the vector type, and manipulate the condition operand such that mask elements greater than the operation's EVL are false. I've also observed inefficient codegen in which our 'VF' patterns don't match raw floating-point SPLAT_VECTORs, which occur in scalable-vector code. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117561 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 + llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 75 +- llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 64 +- .../CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll | 953 +++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 1280 ++++++++++++++++++++ 6 files changed, 2372 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5073fe6..7d224e3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -521,12 +521,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, - ISD::VP_SELECT}; + ISD::VP_MERGE, ISD::VP_SELECT}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, - ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SELECT}; + ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, + ISD::VP_SELECT}; if (!Subtarget.is64Bit()) { // We must custom-lower certain vXi64 operations on RV32 due to the vector @@ -3441,6 +3442,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerSET_ROUNDING(Op, DAG); case ISD::VP_SELECT: return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL); + case ISD::VP_MERGE: + return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL); case ISD::VP_ADD: return lowerVPOp(Op, DAG, RISCVISD::ADD_VL); case ISD::VP_SUB: @@ -10087,6 +10090,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VWADDU_VL) NODE_NAME_CASE(SETCC_VL) NODE_NAME_CASE(VSELECT_VL) + NODE_NAME_CASE(VP_MERGE_VL) NODE_NAME_CASE(VMAND_VL) NODE_NAME_CASE(VMOR_VL) NODE_NAME_CASE(VMXOR_VL) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 23857f9..58b7ec8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -253,6 +253,10 @@ enum NodeType : unsigned { // Vector select with an additional VL operand. This operation is unmasked. VSELECT_VL, + // Vector select with operand #2 (the value when the condition is false) tied + // to the destination and an additional VL operand. This operation is + // unmasked. + VP_MERGE_VL, // Mask binary operators. VMAND_VL, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index a4e92c8..798f848 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -579,10 +579,11 @@ class PseudoToVInst { !subst("_B64", "", !subst("_MASK", "", !subst("_TIED", "", + !subst("_TU", "", !subst("F16", "F", !subst("F32", "F", !subst("F64", "F", - !subst("Pseudo", "", PseudoInst)))))))))))))))))))); + !subst("Pseudo", "", PseudoInst))))))))))))))))))))); } // The destination vector register group for a masked vector instruction cannot @@ -928,6 +929,9 @@ class VPseudoBinaryNoMask(PseudoToVInst.VInst); } +// Special version of VPseudoBinaryNoMask where we pretend the first source is +// tied to the destination. +// This allows maskedoff and rs2 to be the same register. class VPseudoTiedBinaryNoMask : @@ -1079,6 +1083,30 @@ class VPseudoBinaryCarryIn : + Pseudo<(outs RetClass:$rd), + !if(CarryIn, + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl, + ixlenimm:$sew), + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasVecPolicyOp = 0; + let BaseInstr = !cast(PseudoToVInst.VInst); + let VLMul = MInfo.value; +} + class VPseudoTernaryNoMask; } +multiclass VPseudoTiedBinaryV_VM { + foreach m = MxList in + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; +} + multiclass VPseudoBinaryV_XM { foreach m = MxList in @@ -1751,13 +1789,29 @@ multiclass VPseudoBinaryV_XM; } +multiclass VPseudoTiedBinaryV_XM { + foreach m = MxList in + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; +} + multiclass VPseudoVMRG_FM { foreach f = FPList in - foreach m = f.MxList in + foreach m = f.MxList in { def "_V" # f.FX # "M_" # m.MX : VPseudoBinaryCarryIn.R, m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; + // Tied version to allow codegen control over the tail elements + def "_V" # f.FX # "M_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, + m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, + Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; + } } multiclass VPseudoBinaryV_IM; } +multiclass VPseudoTiedBinaryV_IM { + foreach m = MxList in + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; +} + multiclass VPseudoUnaryVMV_V_X_I { foreach m = MxList in { let VLMul = m.value in { @@ -2104,6 +2168,13 @@ multiclass VPseudoVMRG_VM_XM_IM { Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; defm "" : VPseudoBinaryV_IM, Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_IM, + Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; } multiclass VPseudoVCALU_VM_XM_IM { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 9e47cb8..5cff16c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -177,14 +177,13 @@ def riscv_vrgatherei16_vv_vl : SDNode<"RISCVISD::VRGATHEREI16_VV_VL", SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT>]>>; -def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", - SDTypeProfile<1, 4, [SDTCisVec<0>, - SDTCisVec<1>, - SDTCisSameNumEltsAs<0, 1>, - SDTCVecEltisVT<1, i1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<2, 3>, - SDTCisVT<4, XLenVT>]>>; +def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>, + SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisVT<4, XLenVT> +]>; + +def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>; +def riscv_vp_merge_vl : SDNode<"RISCVISD::VP_MERGE_VL", SDT_RISCVSelect_VL>; def SDT_RISCVMaskBinOp_VL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, @@ -976,6 +975,30 @@ foreach vti = AllIntegerVectors in { VLOpFrag)), (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), + vti.RegClass:$rs1, + vti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU") + vti.RegClass:$rs2, vti.RegClass:$rs2, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), + (SplatPat XLenVT:$rs1), + vti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU") + vti.RegClass:$rs2, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), + (SplatPat_simm5 simm5:$rs1), + vti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU") + vti.RegClass:$rs2, vti.RegClass:$rs2, simm5:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } // 12.16. Vector Integer Move Instructions @@ -1223,6 +1246,31 @@ foreach fvti = AllFloatVectors in { (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), + fvti.RegClass:$rs1, + fvti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU") + fvti.RegClass:$rs2, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), + (SplatFPOp fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU") + fvti.RegClass:$rs2, fvti.RegClass:$rs2, + (fvti.Scalar fvti.ScalarRegClass:$rs1), + (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), + (SplatFPOp (fvti.Scalar fpimm0)), + fvti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU") + fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; + // 14.16. Vector Floating-Point Move Instruction // If we're splatting fpimm0, use vmv.v.x vd, x0. def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll new file mode 100644 index 0000000..e0cc13d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -0,0 +1,953 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) + +define <2 x i8> @vpmerge_vv_v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vpmerge_vx_v2i8(i8 %a, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %a, i32 0 + %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vpmerge_vi_v2i8(<2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 2, i32 0 + %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.merge.v4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32) + +define <4 x i8> @vpmerge_vv_v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vpmerge_vx_v4i8(i8 %a, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %a, i32 0 + %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 2, i32 0 + %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32) + +define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vpmerge_vx_v8i8(i8 %a, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %a, i32 0 + %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vpmerge_vi_v8i8(<8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 2, i32 0 + %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.merge.v16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32) + +define <16 x i8> @vpmerge_vv_v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vpmerge_vx_v16i8(i8 %a, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %a, i32 0 + %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vpmerge_vi_v16i8(<16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 2, i32 0 + %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.vp.merge.v2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32) + +define <2 x i16> @vpmerge_vv_v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vpmerge_vx_v2i16(i16 %a, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %a, i32 0 + %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vpmerge_vi_v2i16(<2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 2, i32 0 + %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.merge.v4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32) + +define <4 x i16> @vpmerge_vv_v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vpmerge_vx_v4i16(i16 %a, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %a, i32 0 + %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vpmerge_vi_v4i16(<4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 2, i32 0 + %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.merge.v8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) + +define <8 x i16> @vpmerge_vv_v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vpmerge_vx_v8i16(i16 %a, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %a, i32 0 + %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vpmerge_vi_v8i16(<8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 2, i32 0 + %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.merge.v16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32) + +define <16 x i16> @vpmerge_vv_v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vpmerge_vx_v16i16(i16 %a, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %a, i32 0 + %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vpmerge_vi_v16i16(<16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 2, i32 0 + %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.merge.v2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32) + +define <2 x i32> @vpmerge_vv_v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vpmerge_vx_v2i32(i32 %a, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %a, i32 0 + %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vpmerge_vi_v2i32(<2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 2, i32 0 + %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.merge.v4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32) + +define <4 x i32> @vpmerge_vv_v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vpmerge_vx_v4i32(i32 %a, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %a, i32 0 + %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vpmerge_vi_v4i32(<4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 2, i32 0 + %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) + +define <8 x i32> @vpmerge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vpmerge_vx_v8i32(i32 %a, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %a, i32 0 + %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vpmerge_vi_v8i32(<8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 2, i32 0 + %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.merge.v16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32) + +define <16 x i32> @vpmerge_vv_v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vpmerge_vx_v16i32(i32 %a, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %a, i32 0 + %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vpmerge_vi_v16i32(<16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 2, i32 0 + %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.merge.v2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32) + +define <2 x i64> @vpmerge_vv_v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v9, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %a, i32 0 + %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vpmerge_vi_v2i64(<2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 2, i32 0 + %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.merge.v4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32) + +define <4 x i64> @vpmerge_vv_v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %a, i32 0 + %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vpmerge_vi_v4i64(<4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 2, i32 0 + %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.merge.v8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32) + +define <8 x i64> @vpmerge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %a, i32 0 + %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vpmerge_vi_v8i64(<8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 2, i32 0 + %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.merge.v16i64(<16 x i1>, <16 x i64>, <16 x i64>, i32) + +define <16 x i64> @vpmerge_vv_v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %a, i32 0 + %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vpmerge_vi_v16i64(<16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 2, i32 0 + %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + ret <16 x i64> %v +} + +declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) + +define <2 x half> @vpmerge_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl) + ret <2 x half> %v +} + +define <2 x half> @vpmerge_vf_v2f16(half %a, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %a, i32 0 + %va = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl) + ret <2 x half> %v +} + +declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) + +define <4 x half> @vpmerge_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl) + ret <4 x half> %v +} + +define <4 x half> @vpmerge_vf_v4f16(half %a, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %a, i32 0 + %va = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl) + ret <4 x half> %v +} + +declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) + +define <8 x half> @vpmerge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl) + ret <8 x half> %v +} + +define <8 x half> @vpmerge_vf_v8f16(half %a, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %a, i32 0 + %va = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl) + ret <8 x half> %v +} + +declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) + +define <16 x half> @vpmerge_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl) + ret <16 x half> %v +} + +define <16 x half> @vpmerge_vf_v16f16(half %a, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %a, i32 0 + %va = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl) + ret <16 x half> %v +} + +declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) + +define <2 x float> @vpmerge_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl) + ret <2 x float> %v +} + +define <2 x float> @vpmerge_vf_v2f32(float %a, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %a, i32 0 + %va = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl) + ret <2 x float> %v +} + +declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) + +define <4 x float> @vpmerge_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl) + ret <4 x float> %v +} + +define <4 x float> @vpmerge_vf_v4f32(float %a, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %a, i32 0 + %va = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl) + ret <4 x float> %v +} + +declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) + +define <8 x float> @vpmerge_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl) + ret <8 x float> %v +} + +define <8 x float> @vpmerge_vf_v8f32(float %a, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %a, i32 0 + %va = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl) + ret <8 x float> %v +} + +declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) + +define <16 x float> @vpmerge_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl) + ret <16 x float> %v +} + +define <16 x float> @vpmerge_vf_v16f32(float %a, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %a, i32 0 + %va = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl) + ret <16 x float> %v +} + +declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) + +define <2 x double> @vpmerge_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl) + ret <2 x double> %v +} + +define <2 x double> @vpmerge_vf_v2f64(double %a, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %a, i32 0 + %va = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl) + ret <2 x double> %v +} + +declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) + +define <4 x double> @vpmerge_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl) + ret <4 x double> %v +} + +define <4 x double> @vpmerge_vf_v4f64(double %a, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %a, i32 0 + %va = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl) + ret <4 x double> %v +} + +declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) + +define <8 x double> @vpmerge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl) + ret <8 x double> %v +} + +define <8 x double> @vpmerge_vf_v8f64(double %a, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %a, i32 0 + %va = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl) + ret <8 x double> %v +} + +declare <16 x double> @llvm.vp.merge.v16f64(<16 x i1>, <16 x double>, <16 x double>, i32) + +define <16 x double> @vpmerge_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl) + ret <16 x double> %v +} + +define <16 x double> @vpmerge_vf_v16f64(double %a, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x double> poison, double %a, i32 0 + %va = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer + %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl) + ret <16 x double> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll new file mode 100644 index 0000000..46ebd0a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -0,0 +1,1280 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.merge.nxv1i8(, , , i32) + +define @vpmerge_vv_nxv1i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i8(, , , i32) + +define @vpmerge_vv_nxv2i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i8(, , , i32) + +define @vpmerge_vv_nxv4i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i8(, , , i32) + +define @vpmerge_vv_nxv8i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16i8(, , , i32) + +define @vpmerge_vv_nxv16i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv16i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv16i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32i8(, , , i32) + +define @vpmerge_vv_nxv32i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv32i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv32i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv64i8(, , , i32) + +define @vpmerge_vv_nxv64i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv64i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv64i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1i16(, , , i32) + +define @vpmerge_vv_nxv1i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i16(, , , i32) + +define @vpmerge_vv_nxv2i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i16(, , , i32) + +define @vpmerge_vv_nxv4i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i16(, , , i32) + +define @vpmerge_vv_nxv8i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16i16(, , , i32) + +define @vpmerge_vv_nxv16i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv16i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv16i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32i16(, , , i32) + +define @vpmerge_vv_nxv32i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv32i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv32i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1i32(, , , i32) + +define @vpmerge_vv_nxv1i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i32(, , , i32) + +define @vpmerge_vv_nxv2i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i32(, , , i32) + +define @vpmerge_vv_nxv4i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i32(, , , i32) + +define @vpmerge_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16i32(, , , i32) + +define @vpmerge_vv_nxv16i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv16i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv16i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1i64(, , , i32) + +define @vpmerge_vv_nxv1i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v9, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i64(, , , i32) + +define @vpmerge_vv_nxv2i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i64(, , , i32) + +define @vpmerge_vv_nxv4i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i64(, , , i32) + +define @vpmerge_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1f16(, , , i32) + +define @vpmerge_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2f16(, , , i32) + +define @vpmerge_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4f16(, , , i32) + +define @vpmerge_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8f16(, , , i32) + +define @vpmerge_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16f16(, , , i32) + +define @vpmerge_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv16f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32f16(, , , i32) + +define @vpmerge_vv_nxv32f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv32f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1f32(, , , i32) + +define @vpmerge_vv_nxv1f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2f32(, , , i32) + +define @vpmerge_vv_nxv2f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4f32(, , , i32) + +define @vpmerge_vv_nxv4f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8f32(, , , i32) + +define @vpmerge_vv_nxv8f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16f32(, , , i32) + +define @vpmerge_vv_nxv16f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv16f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1f64(, , , i32) + +define @vpmerge_vv_nxv1f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1f64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2f64(, , , i32) + +define @vpmerge_vv_nxv2f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2f64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4f64(, , , i32) + +define @vpmerge_vv_nxv4f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4f64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8f64(, , , i32) + +define @vpmerge_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8f64( %m, %va, %vb, i32 %evl) + ret %v +} -- 2.7.4