From: Simon Moll Date: Tue, 21 Dec 2021 08:15:23 +0000 (+0100) Subject: [VE] FADD,FSUB,FMUL,FDIV v256f32|f64 isel and tests X-Git-Tag: upstream/15.0.7~22505 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b2cea573c9a175688a68eec84dbb37864933f60f;p=platform%2Fupstream%2Fllvm.git [VE] FADD,FSUB,FMUL,FDIV v256f32|f64 isel and tests Depends on D115940 for the `Binary_rv_vr_vv` pattern class op isel fragment used for divisions. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D116035 --- diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td index a2d647e..99566e9 100644 --- a/llvm/lib/Target/VE/VVPInstrInfo.td +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -29,6 +29,16 @@ def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc. IsVLVT<4> ]>; +// BinaryFPOp(x,y,mask,vl) +def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc. + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisFP<0>, + SDTCisInt<3>, + SDTCisSameNumEltsAs<0, 3>, + IsVLVT<4> +]>; + // Binary operator commutative pattern. class vvp_commutative : PatFrags< @@ -61,4 +71,11 @@ def vvp_srl : SDNode<"VEISD::VVP_SRL", SDTIntBinOpVVP>; def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>; def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>; +def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>; +def c_vvp_fadd : vvp_commutative; +def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>; +def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>; +def c_vvp_fmul : vvp_commutative; +def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>; + // } Binary Operators diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td index 3788e49..8d5d9d1 100644 --- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -178,3 +178,16 @@ defm : Binary_vr_vv_ShortLong; + +defm : Binary_rv_vv_ShortLong; +defm : Binary_rv_vv_ShortLong; +defm : Binary_rv_vv_ShortLong; +defm : Binary_rv_vr_vv_ShortLong; diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def index 97dcb84..8a9231f 100644 --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -53,6 +53,12 @@ ADD_BINARY_VVP_OP_COMPACT(AND) ADD_BINARY_VVP_OP_COMPACT(OR) ADD_BINARY_VVP_OP_COMPACT(XOR) +// FP arithmetic. +ADD_BINARY_VVP_OP_COMPACT(FADD) +ADD_BINARY_VVP_OP_COMPACT(FSUB) +ADD_BINARY_VVP_OP_COMPACT(FMUL) +ADD_BINARY_VVP_OP_COMPACT(FDIV) + #undef ADD_BINARY_VVP_OP #undef ADD_BINARY_VVP_OP_COMPACT #undef ADD_VVP_OP diff --git a/llvm/test/CodeGen/VE/Vector/vp_fadd.ll b/llvm/test/CodeGen/VE/Vector/vp_fadd.ll new file mode 100644 index 0000000..804235f --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_fadd.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x float> @llvm.vp.fadd.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32) + +define fastcc <256 x float> @test_vp_fadd_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvfadd.up %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fadd_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s0, i32 0 + %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fadd_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + + +declare <256 x double> @llvm.vp.fadd.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32) + +define fastcc <256 x double> @test_vp_fadd_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfadd.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fadd_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s0, i32 0 + %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fadd_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_fdiv.ll b/llvm/test/CodeGen/VE/Vector/vp_fdiv.ll new file mode 100644 index 0000000..669e1f9 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_fdiv.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x float> @llvm.vp.fdiv.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32) + +define fastcc <256 x float> @test_vp_fdiv_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfdiv.s %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fdiv_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfdiv.s %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s0, i32 0 + %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fdiv_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfdiv.s %v0, %v0, %s0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + + +declare <256 x double> @llvm.vp.fdiv.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32) + +define fastcc <256 x double> @test_vp_fdiv_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfdiv.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fdiv_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfdiv.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s0, i32 0 + %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fdiv_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfdiv.d %v0, %v0, %s0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_fmul.ll b/llvm/test/CodeGen/VE/Vector/vp_fmul.ll new file mode 100644 index 0000000..0277e75 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_fmul.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x float> @llvm.vp.fmul.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32) + +define fastcc <256 x float> @test_vp_fmul_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvfmul.up %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fmul_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s0, i32 0 + %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fmul_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + + +declare <256 x double> @llvm.vp.fmul.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32) + +define fastcc <256 x double> @test_vp_fmul_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfmul.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fmul_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s0, i32 0 + %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fmul_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_fsub.ll b/llvm/test/CodeGen/VE/Vector/vp_fsub.ll new file mode 100644 index 0000000..8f51522 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_fsub.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x float> @llvm.vp.fsub.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32) + +define fastcc <256 x float> @test_vp_fsub_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fsub_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfsub.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s0, i32 0 + %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fsub_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + + +declare <256 x double> @llvm.vp.fsub.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32) + +define fastcc <256 x double> @test_vp_fsub_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fsub_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfsub.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s0, i32 0 + %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fsub_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +}