From 95bf5ac8a827f55e70cc54e783b12b83ea0f56fd Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Mon, 17 Jan 2022 14:56:12 +0100 Subject: [PATCH] [VE] select|vp.merge|vp.select v256 isel and tests Use the `VMRG` for all three operations for now. `vp_select` will be used in passthru patterns. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D117206 --- llvm/lib/Target/VE/VEISelLowering.cpp | 7 +- llvm/lib/Target/VE/VVPInstrInfo.td | 11 +++ llvm/lib/Target/VE/VVPInstrPatternsVec.td | 32 +++++++ llvm/lib/Target/VE/VVPNodes.def | 5 ++ llvm/test/CodeGen/VE/Vector/vec_select.ll | 135 ++++++++++++++++++++++++++++ llvm/test/CodeGen/VE/Vector/vp_merge.ll | 143 ++++++++++++++++++++++++++++++ llvm/test/CodeGen/VE/Vector/vp_select.ll | 143 ++++++++++++++++++++++++++++++ 7 files changed, 475 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/VE/Vector/vec_select.ll create mode 100644 llvm/test/CodeGen/VE/Vector/vp_merge.ll create mode 100644 llvm/test/CodeGen/VE/Vector/vp_select.ll diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 4cdc895..601bd5b 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -1720,7 +1720,7 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); -#define ADD_BINARY_VVP_OP(VVP_NAME, VP_NAME, ISD_NAME) case ISD::ISD_NAME: +#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME: #include "VVPNodes.def" return lowerToVVP(Op, DAG); } @@ -2729,6 +2729,11 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { assert(LegalVecVT.isSimple()); return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0), Op->getOperand(1), Mask, AVL); + } else if (VVPOpcode == VEISD::VVP_SELECT) { + auto Mask = Op->getOperand(0); + auto OnTrue = Op->getOperand(1); + auto OnFalse = Op->getOperand(2); + return DAG.getNode(VVPOpcode, DL, LegalVecVT, OnTrue, OnFalse, Mask, AVL); } llvm_unreachable("lowerToVVP called for unexpected SDNode."); } diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td index 99566e9..ef9c238 100644 --- a/llvm/lib/Target/VE/VVPInstrInfo.td +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -39,6 +39,15 @@ def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc. IsVLVT<4> ]>; +// Select(OnTrue, OnFalse, SelMask, vl) +def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge + SDTCisVec<0>, + SDTCisSameNumEltsAs<0, 3>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + IsVLVT<4> +]>; + // Binary operator commutative pattern. class vvp_commutative : PatFrags< @@ -79,3 +88,5 @@ def c_vvp_fmul : vvp_commutative; def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>; // } Binary Operators + +def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>; diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td index 8d5d9d1..cb21001 100644 --- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -191,3 +191,35 @@ defm : Binary_rv_vv_ShortLong; + +multiclass Merge_mvv< + SDPatternOperator OpNode, + ValueType DataVT, ValueType MaskVT, + string OpBaseName> { + // Masked. + def : Pat<(OpNode + DataVT:$vtrue, DataVT:$vfalse, + MaskVT:$vm, + i32:$avl), + (!cast(OpBaseName#"vvml_v") + $vfalse, $vtrue, $vm, $avl, $vfalse)>; +} + +multiclass Merge_mvv_ShortLong< + SDPatternOperator OpNode, + ValueType LongDataVT, ValueType ShortDataVT, + string OpBaseName> { + defm : Merge_mvv; + defm : Merge_mvv; +} + +defm : Merge_mvv_ShortLong; +defm : Merge_mvv_ShortLong; diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def index 8a9231f..8000f84 100644 --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -59,6 +59,11 @@ ADD_BINARY_VVP_OP_COMPACT(FSUB) ADD_BINARY_VVP_OP_COMPACT(FMUL) ADD_BINARY_VVP_OP_COMPACT(FDIV) +// Shuffles. +ADD_VVP_OP(VVP_SELECT,VSELECT) +HANDLE_VP_TO_VVP(VP_SELECT, VVP_SELECT) +HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT) + #undef ADD_BINARY_VVP_OP #undef ADD_BINARY_VVP_OP_COMPACT #undef ADD_VVP_OP diff --git a/llvm/test/CodeGen/VE/Vector/vec_select.ll b/llvm/test/CodeGen/VE/Vector/vec_select.ll new file mode 100644 index 0000000..8ccbff7 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vec_select.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x i32> @llvm.vec.select.v256i32(<256 x i1>, <256 x i32>, <256 x i32>, i32) + +define fastcc <256 x i32> @test_vec_select_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m) { +; CHECK-LABEL: test_vec_select_v256i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = select <256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1 + ret <256 x i32> %r0 +} + +define fastcc <256 x i32> @test_vec_select_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m) { +; CHECK-LABEL: test_vec_select_v256i32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i32> undef, i32 %s1, i32 0 + %i1 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer + %r0 = select <256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1 + ret <256 x i32> %r0 +} + +declare <256 x float> @llvm.vec.select.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32) + +define fastcc <256 x float> @test_vec_select_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m) { +; CHECK-LABEL: test_vec_select_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = select <256 x i1> %m, <256 x float> %i0, <256 x float> %i1 + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vec_select_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m) { +; CHECK-LABEL: test_vec_select_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = select <256 x i1> %m, <256 x float> %i0, <256 x float> %i1 + ret <256 x float> %r0 +} + +declare <256 x double> @llvm.vec.select.v256f64(<256 x i1>, <256 x double>, <256 x double>, i32) + +define fastcc <256 x double> @test_vec_select_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m) { +; CHECK-LABEL: test_vec_select_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = select <256 x i1> %m, <256 x double> %i0, <256 x double> %i1 + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vec_select_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m) { +; CHECK-LABEL: test_vec_select_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = select <256 x i1> %m, <256 x double> %i0, <256 x double> %i1 + ret <256 x double> %r0 +} + +declare <256 x i64> @llvm.vec.select.v256i64(<256 x i1>, <256 x i64>, <256 x i64>, i32) + +define fastcc <256 x i64> @test_vec_select_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m) { +; CHECK-LABEL: test_vec_select_v256i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = select <256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1 + ret <256 x i64> %r0 +} + +define fastcc <256 x i64> @test_vec_select_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m) { +; CHECK-LABEL: test_vec_select_v256i64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i64> undef, i64 %s1, i32 0 + %i1 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer + %r0 = select <256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1 + ret <256 x i64> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_merge.ll b/llvm/test/CodeGen/VE/Vector/vp_merge.ll new file mode 100644 index 0000000..fdd3809 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_merge.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x i32> @llvm.vp.merge.v256i32(<256 x i1>, <256 x i32>, <256 x i32>, i32) + +define fastcc <256 x i32> @test_vp_merge_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_merge_v256i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i32> @llvm.vp.merge.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot) + ret <256 x i32> %r0 +} + +define fastcc <256 x i32> @test_vp_merge_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_merge_v256i32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i32> undef, i32 %s1, i32 0 + %i1 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i32> @llvm.vp.merge.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot) + ret <256 x i32> %r0 +} + +declare <256 x float> @llvm.vp.merge.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32) + +define fastcc <256 x float> @test_vp_merge_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_merge_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_merge_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_merge_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot) + ret <256 x float> %r0 +} + +declare <256 x double> @llvm.vp.merge.v256f64(<256 x i1>, <256 x double>, <256 x double>, i32) + +define fastcc <256 x double> @test_vp_merge_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_merge_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.merge.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_merge_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_merge_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.merge.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot) + ret <256 x double> %r0 +} + +declare <256 x i64> @llvm.vp.merge.v256i64(<256 x i1>, <256 x i64>, <256 x i64>, i32) + +define fastcc <256 x i64> @test_vp_merge_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_merge_v256i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.merge.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot) + ret <256 x i64> %r0 +} + +define fastcc <256 x i64> @test_vp_merge_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_merge_v256i64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i64> undef, i64 %s1, i32 0 + %i1 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i64> @llvm.vp.merge.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot) + ret <256 x i64> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_select.ll b/llvm/test/CodeGen/VE/Vector/vp_select.ll new file mode 100644 index 0000000..f716efa --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_select.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x i32> @llvm.vp.select.v256i32(<256 x i1>, <256 x i32>, <256 x i32>, i32) + +define fastcc <256 x i32> @test_vp_select_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_select_v256i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i32> @llvm.vp.select.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot) + ret <256 x i32> %r0 +} + +define fastcc <256 x i32> @test_vp_select_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_select_v256i32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i32> undef, i32 %s1, i32 0 + %i1 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i32> @llvm.vp.select.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot) + ret <256 x i32> %r0 +} + +declare <256 x float> @llvm.vp.select.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32) + +define fastcc <256 x float> @test_vp_select_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_select_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.select.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_select_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_select_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.select.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot) + ret <256 x float> %r0 +} + +declare <256 x double> @llvm.vp.select.v256f64(<256 x i1>, <256 x double>, <256 x double>, i32) + +define fastcc <256 x double> @test_vp_select_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_select_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.select.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_select_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_select_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.select.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot) + ret <256 x double> %r0 +} + +declare <256 x i64> @llvm.vp.select.v256i64(<256 x i1>, <256 x i64>, <256 x i64>, i32) + +define fastcc <256 x i64> @test_vp_select_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_select_v256i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.select.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot) + ret <256 x i64> %r0 +} + +define fastcc <256 x i64> @test_vp_select_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %pivot) { +; CHECK-LABEL: test_vp_select_v256i64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i64> undef, i64 %s1, i32 0 + %i1 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i64> @llvm.vp.select.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot) + ret <256 x i64> %r0 +} -- 2.7.4