IsVLVT<4>
]>;
+// BinaryFPOp(x,y,mask,vl)
+def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisFP<0>,
+ SDTCisInt<3>,
+ SDTCisSameNumEltsAs<0, 3>,
+ IsVLVT<4>
+]>;
+
// Binary operator commutative pattern.
class vvp_commutative<SDNode RootOp> :
PatFrags<
def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>;
def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>;
+def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>;
+def c_vvp_fadd : vvp_commutative<vvp_fadd>;
+def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>;
+def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>;
+def c_vvp_fmul : vvp_commutative<vvp_fmul>;
+def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
+
// } Binary Operators
defm : Binary_vr_vv_ShortLong<vvp_srl,
i64, v256i64, "VSRL",
i32, v256i32, "PVSRLLO">;
+
+defm : Binary_rv_vv_ShortLong<c_vvp_fadd,
+ f64, v256f64, "VFADDD",
+ f32, v256f32, "PVFADDUP">;
+defm : Binary_rv_vv_ShortLong<c_vvp_fmul,
+ f64, v256f64, "VFMULD",
+ f32, v256f32, "PVFMULUP">;
+defm : Binary_rv_vv_ShortLong<vvp_fsub,
+ f64, v256f64, "VFSUBD",
+ f32, v256f32, "PVFSUBUP">;
+defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
+ f64, v256f64, "VFDIVD",
+ f32, v256f32, "VFDIVS">;
ADD_BINARY_VVP_OP_COMPACT(OR)
ADD_BINARY_VVP_OP_COMPACT(XOR)
+// FP arithmetic.
+ADD_BINARY_VVP_OP_COMPACT(FADD)
+ADD_BINARY_VVP_OP_COMPACT(FSUB)
+ADD_BINARY_VVP_OP_COMPACT(FMUL)
+ADD_BINARY_VVP_OP_COMPACT(FDIV)
+
#undef ADD_BINARY_VVP_OP
#undef ADD_BINARY_VVP_OP_COMPACT
#undef ADD_VVP_OP
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.fadd.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fadd_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: pvfadd.up %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fadd_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fadd_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+
+declare <256 x double> @llvm.vp.fadd.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
+
+define fastcc <256 x double> @test_vp_fadd_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f64_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfadd.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fadd_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f64_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fadd_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f64_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.fdiv.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fdiv_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfdiv.s %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fdiv_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfdiv.s %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fdiv_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfdiv.s %v0, %v0, %s0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+
+declare <256 x double> @llvm.vp.fdiv.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
+
+define fastcc <256 x double> @test_vp_fdiv_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f64_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfdiv.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fdiv_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f64_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfdiv.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fdiv_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f64_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfdiv.d %v0, %v0, %s0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.fmul.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fmul_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: pvfmul.up %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fmul_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fmul_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+
+declare <256 x double> @llvm.vp.fmul.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
+
+define fastcc <256 x double> @test_vp_fmul_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f64_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfmul.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fmul_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f64_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fmul_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f64_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.fsub.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fsub_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fsub_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfsub.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fsub_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+
+declare <256 x double> @llvm.vp.fsub.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
+
+define fastcc <256 x double> @test_vp_fsub_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f64_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fsub_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f64_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfsub.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fsub_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f64_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}