"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set DPR:$Vd,
(v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
+ (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
+ (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
+ (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
- (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasNEON]>;
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
[(set QPR:$Vd,
(v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
+ (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
+ (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
+ (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
+
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
- (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasNEON]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
%tmp7 = or <2 x i64> %tmp4, %tmp6
ret <2 x i64> %tmp7
}
+
+define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone optsize ssp {
+; CHECK: f1:
+; CHECK: vbsl
+ %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind
+ ret <8 x i8> %vbsl.i
+}
+
+define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
+; CHECK: f2:
+; CHECK: vbsl
+ %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind
+ ret <4 x i16> %vbsl3.i
+}
+
+define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
+; CHECK: f3:
+; CHECK: vbsl
+ %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind
+ ret <2 x i32> %vbsl3.i
+}
+
+define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp {
+; CHECK: g1:
+; CHECK: vbsl
+ %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind
+ ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp {
+; CHECK: g2:
+; CHECK: vbsl
+ %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind
+ ret <8 x i16> %vbsl3.i
+}
+
+define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
+; CHECK: g3:
+; CHECK: vbsl
+ %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind
+ ret <4 x i32> %vbsl3.i
+}
+
+declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone