+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o -| FileCheck %s
define <8 x i16> @smull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: smull_v8i8_v8i16:
-; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
define <4 x i32> @smull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: smull_v4i16_v4i32:
-; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
define <2 x i64> @smull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK-LABEL: smull_v2i32_v2i64:
-; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
define <8 x i16> @umull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: umull_v8i8_v8i16:
-; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
define <4 x i32> @umull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: umull_v4i16_v4i32:
-; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
define <2 x i64> @umull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK-LABEL: umull_v2i32_v2i64:
-; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
ret <2 x i64> %tmp5
}
+define <8 x i16> @amull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+; CHECK-LABEL: amull_v8i8_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+ %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = mul <8 x i16> %tmp3, %tmp4
+ %and = and <8 x i16> %tmp5, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @amull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+; CHECK-LABEL: amull_v4i16_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+ %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = mul <4 x i32> %tmp3, %tmp4
+ %and = and <4 x i32> %tmp5, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @amull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+; CHECK-LABEL: amull_v2i32_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-NEXT: fmov x10, d1
+; CHECK-NEXT: fmov x11, d0
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: mul x10, x11, x10
+; CHECK-NEXT: mul x8, x9, x8
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+ %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = mul <2 x i64> %tmp3, %tmp4
+ %and = and <2 x i64> %tmp5, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i16> @smlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
; CHECK-LABEL: smlal_v8i8_v8i16:
-; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: smlal v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = load <8 x i8>, <8 x i8>* %C
define <4 x i32> @smlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
; CHECK-LABEL: smlal_v4i16_v4i32:
-; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: smlal v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = load <4 x i16>, <4 x i16>* %C
define <2 x i64> @smlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
; CHECK-LABEL: smlal_v2i32_v2i64:
-; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = load <2 x i32>, <2 x i32>* %C
define <8 x i16> @umlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
; CHECK-LABEL: umlal_v8i8_v8i16:
-; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: umlal v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = load <8 x i8>, <8 x i8>* %C
define <4 x i32> @umlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
; CHECK-LABEL: umlal_v4i16_v4i32:
-; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: umlal v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = load <4 x i16>, <4 x i16>* %C
define <2 x i64> @umlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
; CHECK-LABEL: umlal_v2i32_v2i64:
-; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: umlal v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = load <2 x i32>, <2 x i32>* %C
ret <2 x i64> %tmp7
}
+define <8 x i16> @amlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+; CHECK-LABEL: amlal_v8i8_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: ushll v2.8h, v2.8b, #0
+; CHECK-NEXT: mla v0.8h, v1.8h, v2.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
+ %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+ %tmp6 = mul <8 x i16> %tmp4, %tmp5
+ %tmp7 = add <8 x i16> %tmp1, %tmp6
+ %and = and <8 x i16> %tmp7, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @amlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+; CHECK-LABEL: amlal_v4i16_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x1]
+; CHECK-NEXT: ldr d1, [x2]
+; CHECK-NEXT: ldr q2, [x0]
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
+ %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+ %tmp6 = mul <4 x i32> %tmp4, %tmp5
+ %tmp7 = add <4 x i32> %tmp1, %tmp6
+ %and = and <4 x i32> %tmp7, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @amlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+; CHECK-LABEL: amlal_v2i32_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x1]
+; CHECK-NEXT: ldr d1, [x2]
+; CHECK-NEXT: ldr q2, [x0]
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-NEXT: fmov x10, d1
+; CHECK-NEXT: fmov x11, d0
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: mul x10, x11, x10
+; CHECK-NEXT: mul x8, x9, x8
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: add v0.2d, v2.2d, v0.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
+ %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+ %tmp6 = mul <2 x i64> %tmp4, %tmp5
+ %tmp7 = add <2 x i64> %tmp1, %tmp6
+ %and = and <2 x i64> %tmp7, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i16> @smlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
; CHECK-LABEL: smlsl_v8i8_v8i16:
-; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: smlsl v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = load <8 x i8>, <8 x i8>* %C
define <4 x i32> @smlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
; CHECK-LABEL: smlsl_v4i16_v4i32:
-; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = load <4 x i16>, <4 x i16>* %C
define <2 x i64> @smlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
; CHECK-LABEL: smlsl_v2i32_v2i64:
-; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = load <2 x i32>, <2 x i32>* %C
define <8 x i16> @umlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
; CHECK-LABEL: umlsl_v8i8_v8i16:
-; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: umlsl v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = load <8 x i8>, <8 x i8>* %C
define <4 x i32> @umlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
; CHECK-LABEL: umlsl_v4i16_v4i32:
-; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = load <4 x i16>, <4 x i16>* %C
define <2 x i64> @umlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
; CHECK-LABEL: umlsl_v2i32_v2i64:
-; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = load <2 x i32>, <2 x i32>* %C
ret <2 x i64> %tmp7
}
+define <8 x i16> @amlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+; CHECK-LABEL: amlsl_v8i8_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: ldr d2, [x2]
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: ushll v2.8h, v2.8b, #0
+; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
+ %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+ %tmp6 = mul <8 x i16> %tmp4, %tmp5
+ %tmp7 = sub <8 x i16> %tmp1, %tmp6
+ %and = and <8 x i16> %tmp7, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @amlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+; CHECK-LABEL: amlsl_v4i16_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x1]
+; CHECK-NEXT: ldr d1, [x2]
+; CHECK-NEXT: ldr q2, [x0]
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-NEXT: mls v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
+ %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+ %tmp6 = mul <4 x i32> %tmp4, %tmp5
+ %tmp7 = sub <4 x i32> %tmp1, %tmp6
+ %and = and <4 x i32> %tmp7, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @amlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+; CHECK-LABEL: amlsl_v2i32_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x1]
+; CHECK-NEXT: ldr d1, [x2]
+; CHECK-NEXT: ldr q2, [x0]
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-NEXT: fmov x10, d1
+; CHECK-NEXT: fmov x11, d0
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: mul x10, x11, x10
+; CHECK-NEXT: mul x8, x9, x8
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
+ %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+ %tmp6 = mul <2 x i64> %tmp4, %tmp5
+ %tmp7 = sub <2 x i64> %tmp1, %tmp6
+ %and = and <2 x i64> %tmp7, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
; SMULL recognizing BUILD_VECTORs with sign/zero-extended elements.
define <8 x i16> @smull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; CHECK-LABEL: smull_extvec_v8i8_v8i16:
-; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8b, #244
+; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tmp3 = sext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12>
ret <8 x i16> %tmp4
define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
; CHECK-LABEL: smull_noextvec_v8i8_v8i16:
-; CHECK: mov
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #64537
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp3 = sext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
ret <8 x i16> %tmp4
define <4 x i32> @smull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
; CHECK-LABEL: smull_extvec_v4i16_v4i32:
-; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvni v1.4h, #11
+; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
%tmp3 = sext <4 x i16> %arg to <4 x i32>
%tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12>
ret <4 x i32> %tmp4
}
define <2 x i64> @smull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
-; CHECK: smull_extvec_v2i32_v2i64
-; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: smull_extvec_v2i32_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-1234
+; CHECK-NEXT: dup v1.2s, w8
+; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp3 = sext <2 x i32> %arg to <2 x i64>
%tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234>
ret <2 x i64> %tmp4
define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; CHECK-LABEL: umull_extvec_v8i8_v8i16:
-; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8b, #12
+; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tmp3 = zext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
ret <8 x i16> %tmp4
define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK: mov
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #999
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp3 = zext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
ret <8 x i16> %tmp4
define <4 x i32> @umull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
; CHECK-LABEL: umull_extvec_v4i16_v4i32:
-; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1234
+; CHECK-NEXT: dup v1.4h, w8
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
%tmp3 = zext <4 x i16> %arg to <4 x i32>
%tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
ret <4 x i32> %tmp4
define <2 x i64> @umull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
; CHECK-LABEL: umull_extvec_v2i32_v2i64:
-; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1234
+; CHECK-NEXT: dup v1.2s, w8
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp3 = zext <2 x i32> %arg to <2 x i64>
%tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
ret <2 x i64> %tmp4
}
+define <8 x i16> @amull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
+; CHECK-LABEL: amull_extvec_v8i8_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: movi v1.8h, #12
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+ %tmp3 = zext <8 x i8> %arg to <8 x i16>
+ %tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
+ %and = and <8 x i16> %tmp4, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @amull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
+; CHECK-LABEL: amull_extvec_v4i16_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1234
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %tmp3 = zext <4 x i16> %arg to <4 x i32>
+ %tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
+ %and = and <4 x i32> %tmp4, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @amull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
+; CHECK-LABEL: amull_extvec_v2i32_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: mov w8, #1234
+; CHECK-NEXT: fmov x10, d0
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: mul x10, x10, x8
+; CHECK-NEXT: mul x8, x9, x8
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %tmp3 = zext <2 x i32> %arg to <2 x i64>
+ %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
+ %and = and <2 x i64> %tmp4, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define i16 @smullWithInconsistentExtensions(<8 x i8> %x, <8 x i8> %y) {
; If one operand has a zero-extend and the other a sign-extend, smull
; cannot be used.
; CHECK-LABEL: smullWithInconsistentExtensions:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: umov w0, v0.h[0]
+; CHECK-NEXT: ret
%s = sext <8 x i8> %x to <8 x i16>
%z = zext <8 x i8> %y to <8 x i16>
%m = mul <8 x i16> %s, %z
ret i16 %r
}
-define void @distribute(i16* %dst, i8* %src, i32 %mul) nounwind {
-entry:
+define void @distribute(<8 x i16>* %dst, <16 x i8>* %src, i32 %mul) nounwind {
; CHECK-LABEL: distribute:
-; CHECK: umull [[REG1:(v[0-9]+.8h)]], {{v[0-9]+}}.8b, [[REG2:(v[0-9]+.8b)]]
-; CHECK: umlal [[REG1]], {{v[0-9]+}}.8b, [[REG2]]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x1]
+; CHECK-NEXT: dup v1.8b, w2
+; CHECK-NEXT: mov d2, v0.d[1]
+; CHECK-NEXT: umull v2.8h, v2.8b, v1.8b
+; CHECK-NEXT: umlal v2.8h, v0.8b, v1.8b
+; CHECK-NEXT: str q2, [x0]
+; CHECK-NEXT: ret
+entry:
%0 = trunc i32 %mul to i8
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
%2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
- %3 = tail call <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8* %src, i32 1)
+ %3 = load <16 x i8>, <16 x i8>* %src, align 1
%4 = bitcast <16 x i8> %3 to <2 x double>
%5 = extractelement <2 x double> %4, i32 1
%6 = bitcast double %5 to <8 x i8>
%11 = zext <8 x i8> %10 to <8 x i16>
%12 = add <8 x i16> %7, %11
%13 = mul <8 x i16> %12, %8
- %14 = bitcast i16* %dst to i8*
- tail call void @llvm.aarch64.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2)
+ store <8 x i16> %13, <8 x i16>* %dst, align 2
ret void
}
define <16 x i16> @umull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK-LABEL: umull2_i8:
-; CHECK-DAG: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-; CHECK-DAG: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b
+; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: ret
%arg1_ext = zext <16 x i8> %arg1 to <16 x i16>
%arg2_ext = zext <16 x i8> %arg2 to <16 x i16>
%mul = mul <16 x i16> %arg1_ext, %arg2_ext
define <16 x i16> @smull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK-LABEL: smull2_i8:
-; CHECK-DAG: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-; CHECK-DAG: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
+; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: ret
%arg1_ext = sext <16 x i8> %arg1 to <16 x i16>
%arg2_ext = sext <16 x i8> %arg2 to <16 x i16>
%mul = mul <16 x i16> %arg1_ext, %arg2_ext
define <8 x i32> @umull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK-LABEL: umull2_i16:
-; CHECK-DAG: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-; CHECK-DAG: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: ret
%arg1_ext = zext <8 x i16> %arg1 to <8 x i32>
%arg2_ext = zext <8 x i16> %arg2 to <8 x i32>
%mul = mul <8 x i32> %arg1_ext, %arg2_ext
define <8 x i32> @smull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK-LABEL: smull2_i16:
-; CHECK-DAG: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-; CHECK-DAG: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0:
+; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: ret
%arg1_ext = sext <8 x i16> %arg1 to <8 x i32>
%arg2_ext = sext <8 x i16> %arg2 to <8 x i32>
%mul = mul <8 x i32> %arg1_ext, %arg2_ext
define <4 x i64> @umull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK-LABEL: umull2_i32:
-; CHECK-DAG: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-; CHECK-DAG: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0:
+; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: ret
%arg1_ext = zext <4 x i32> %arg1 to <4 x i64>
%arg2_ext = zext <4 x i32> %arg2 to <4 x i64>
%mul = mul <4 x i64> %arg1_ext, %arg2_ext
define <4 x i64> @smull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK-LABEL: smull2_i32:
-; CHECK-DAG: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-; CHECK-DAG: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0:
+; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: ret
%arg1_ext = sext <4 x i32> %arg1 to <4 x i64>
%arg2_ext = sext <4 x i32> %arg2 to <4 x i64>
%mul = mul <4 x i64> %arg1_ext, %arg2_ext
ret <4 x i64> %mul
}
-declare <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8*, i32) nounwind readonly
-
-declare void @llvm.aarch64.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+define <16 x i16> @amull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) {
+; CHECK-LABEL: amull2_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: ushll2 v3.8h, v1.16b, #0
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: mul v1.8h, v2.8h, v3.8h
+; CHECK-NEXT: bic v1.8h, #255, lsl #8
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+ %arg1_ext = zext <16 x i8> %arg1 to <16 x i16>
+ %arg2_ext = zext <16 x i8> %arg2 to <16 x i16>
+ %mul = mul <16 x i16> %arg1_ext, %arg2_ext
+ %and = and <16 x i16> %mul, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <16 x i16> %and
+}
+
+define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
+; CHECK-LABEL: amull2_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ushll2 v3.4s, v1.8h, #0
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-NEXT: movi v4.2d, #0x00ffff0000ffff
+; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mul v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: and v1.16b, v1.16b, v4.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v4.16b
+; CHECK-NEXT: ret
+ %arg1_ext = zext <8 x i16> %arg1 to <8 x i32>
+ %arg2_ext = zext <8 x i16> %arg2 to <8 x i32>
+ %mul = mul <8 x i32> %arg1_ext, %arg2_ext
+ %and = and <8 x i32> %mul, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <8 x i32> %and
+}
+
+define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
+; CHECK-LABEL: amull2_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll2 v2.2d, v0.4s, #0
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ushll2 v3.2d, v1.4s, #0
+; CHECK-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-NEXT: fmov x10, d1
+; CHECK-NEXT: fmov x11, d0
+; CHECK-NEXT: fmov x13, d3
+; CHECK-NEXT: fmov x14, d2
+; CHECK-NEXT: mov x8, v1.d[1]
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: mul x10, x11, x10
+; CHECK-NEXT: mov x11, v3.d[1]
+; CHECK-NEXT: mov x12, v2.d[1]
+; CHECK-NEXT: mul x13, x14, x13
+; CHECK-NEXT: mul x8, x9, x8
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: mul x9, x12, x11
+; CHECK-NEXT: fmov d1, x13
+; CHECK-NEXT: movi v2.2d, #0x000000ffffffff
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: mov v1.d[1], x9
+; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %arg1_ext = zext <4 x i32> %arg1 to <4 x i64>
+ %arg2_ext = zext <4 x i32> %arg2 to <4 x i64>
+ %mul = mul <4 x i64> %arg1_ext, %arg2_ext
+ %and = and <4 x i64> %mul, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ ret <4 x i64> %and
+}
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
-
+declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
-
declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
-
declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
-
declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
-
declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
-
declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
-
declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
-
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
-
declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>)
-
declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
-
declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
-
declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>)
-
declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
-
declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
-
declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
-
declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
-
declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
-
declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
-
declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>)
-
declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>)
-
declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>)
-
declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>)
-
declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>)
-
declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>)
define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vaddl_s8:
-; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <8 x i8> %a to <8 x i16>
%vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vaddl_s16:
-; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <4 x i16> %a to <4 x i32>
%vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vaddl_s32:
-; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <2 x i32> %a to <2 x i64>
%vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vaddl_u8:
-; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %a to <8 x i16>
%vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vaddl_u16:
-; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <4 x i16> %a to <4 x i32>
%vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vaddl_u32:
-; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <2 x i32> %a to <2 x i64>
%vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
ret <2 x i64> %add.i
}
+define <8 x i16> @test_vaddl_a8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vaddl_a8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+ %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+ %and = and <8 x i16> %add.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @test_vaddl_a16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vaddl_a16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+ %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+ %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
+ %and = and <4 x i32> %add.i, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @test_vaddl_a32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vaddl_a32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+ %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+ %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
+ %and = and <2 x i64> %add.i, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vaddl_high_s8:
-; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddl2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddl_high_s16:
-; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddl2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddl_high_s32:
-; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddl2 v0.2d, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vaddl_high_u8:
-; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddl_high_u16:
-; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddl_high_u32:
-; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddl2 v0.2d, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
ret <2 x i64> %add.i
}
+define <8 x i16> @test_vaddl_high_a8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaddl_high_a8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+ %add.i = add <8 x i16> %0, %1
+ %and = and <8 x i16> %add.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @test_vaddl_high_a16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddl_high_a16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+ %add.i = add <4 x i32> %0, %1
+ %and = and <4 x i32> %add.i, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @test_vaddl_high_a32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddl_high_a32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+ %add.i = add <2 x i64> %0, %1
+ %and = and <2 x i64> %add.i, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vaddw_s8:
-; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <8 x i8> %b to <8 x i16>
%add.i = add <8 x i16> %vmovl.i.i, %a
define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vaddw_s16:
-; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <4 x i16> %b to <4 x i32>
%add.i = add <4 x i32> %vmovl.i.i, %a
define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vaddw_s32:
-; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <2 x i32> %b to <2 x i64>
%add.i = add <2 x i64> %vmovl.i.i, %a
define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vaddw_u8:
-; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %b to <8 x i16>
%add.i = add <8 x i16> %vmovl.i.i, %a
define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vaddw_u16:
-; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <4 x i16> %b to <4 x i32>
%add.i = add <4 x i32> %vmovl.i.i, %a
define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vaddw_u32:
-; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <2 x i32> %b to <2 x i64>
%add.i = add <2 x i64> %vmovl.i.i, %a
ret <2 x i64> %add.i
}
+define <8 x i16> @test_vaddw_a8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vaddw_a8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i, %a
+ %and = and <8 x i16> %add.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @test_vaddw_a16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vaddw_a16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+ %add.i = add <4 x i32> %vmovl.i.i, %a
+ %and = and <4 x i32> %add.i, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @test_vaddw_a32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vaddw_a32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+ %add.i = add <2 x i64> %vmovl.i.i, %a
+ %and = and <2 x i64> %add.i, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vaddw_high_s8:
-; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddw2 v0.8h, v0.8h, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddw_high_s16:
-; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddw2 v0.4s, v0.4s, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddw_high_s32:
-; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddw2 v0.2d, v0.2d, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vaddw_high_u8:
-; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddw2 v0.8h, v0.8h, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddw_high_u16:
-; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddw_high_u32:
-; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
ret <2 x i64> %add.i
}
+define <8 x i16> @test_vaddw_high_a8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaddw_high_a8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %add.i = add <8 x i16> %0, %a
+ %and = and <8 x i16> %add.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @test_vaddw_high_a16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddw_high_a16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %add.i = add <4 x i32> %0, %a
+ %and = and <4 x i32> %add.i, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @test_vaddw_high_a32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddw_high_a32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %add.i = add <2 x i64> %0, %a
+ %and = and <2 x i64> %add.i, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vsubl_s8:
-; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <8 x i8> %a to <8 x i16>
%vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vsubl_s16:
-; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <4 x i16> %a to <4 x i32>
%vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vsubl_s32:
-; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <2 x i32> %a to <2 x i64>
%vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vsubl_u8:
-; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %a to <8 x i16>
%vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vsubl_u16:
-; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <4 x i16> %a to <4 x i32>
%vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vsubl_u32:
-; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <2 x i32> %a to <2 x i64>
%vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
ret <2 x i64> %sub.i
}
+define <8 x i16> @test_vsubl_a8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vsubl_a8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+ %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+ %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
+ %and = and <8 x i16> %sub.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @test_vsubl_a16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vsubl_a16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+ %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+ %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
+ %and = and <4 x i32> %sub.i, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @test_vsubl_a32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vsubl_a32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+ %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+ %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
+ %and = and <2 x i64> %sub.i, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsubl_high_s8:
-; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubl2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubl_high_s16:
-; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubl2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubl_high_s32:
-; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubl2 v0.2d, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsubl_high_u8:
-; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubl2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubl_high_u16:
-; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubl2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubl_high_u32:
-; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubl2 v0.2d, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
ret <2 x i64> %sub.i
}
+define <8 x i16> @test_vsubl_high_a8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsubl_high_a8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+ %sub.i = sub <8 x i16> %0, %1
+ %and = and <8 x i16> %sub.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @test_vsubl_high_a16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubl_high_a16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+ %sub.i = sub <4 x i32> %0, %1
+ %and = and <4 x i32> %sub.i, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @test_vsubl_high_a32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubl_high_a32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+ %sub.i = sub <2 x i64> %0, %1
+ %and = and <2 x i64> %sub.i, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vsubw_s8:
-; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <8 x i8> %b to <8 x i16>
%sub.i = sub <8 x i16> %a, %vmovl.i.i
define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vsubw_s16:
-; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <4 x i16> %b to <4 x i32>
%sub.i = sub <4 x i32> %a, %vmovl.i.i
define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vsubw_s32:
-; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = sext <2 x i32> %b to <2 x i64>
%sub.i = sub <2 x i64> %a, %vmovl.i.i
define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vsubw_u8:
-; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %b to <8 x i16>
%sub.i = sub <8 x i16> %a, %vmovl.i.i
define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vsubw_u16:
-; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <4 x i16> %b to <4 x i32>
%sub.i = sub <4 x i32> %a, %vmovl.i.i
define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vsubw_u32:
-; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT: ret
entry:
%vmovl.i.i = zext <2 x i32> %b to <2 x i64>
%sub.i = sub <2 x i64> %a, %vmovl.i.i
ret <2 x i64> %sub.i
}
+define <8 x i16> @test_vsubw_a8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vsubw_a8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+ %sub.i = sub <8 x i16> %a, %vmovl.i.i
+ %and = and <8 x i16> %sub.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @test_vsubw_a16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vsubw_a16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+ %sub.i = sub <4 x i32> %a, %vmovl.i.i
+ %and = and <4 x i32> %sub.i, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @test_vsubw_a32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vsubw_a32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+ %sub.i = sub <2 x i64> %a, %vmovl.i.i
+ %and = and <2 x i64> %sub.i, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsubw_high_s8:
-; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubw2 v0.8h, v0.8h, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubw_high_s16:
-; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubw2 v0.4s, v0.4s, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubw_high_s32:
-; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ssubw2 v0.2d, v0.2d, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsubw_high_u8:
-; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubw2 v0.8h, v0.8h, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubw_high_u16:
-; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubw2 v0.4s, v0.4s, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubw_high_u32:
-; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: usubw2 v0.2d, v0.2d, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
ret <2 x i64> %sub.i
}
+define <8 x i16> @test_vsubw_high_a8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsubw_high_a8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %sub.i = sub <8 x i16> %a, %0
+ %and = and <8 x i16> %sub.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ ret <8 x i16> %and
+}
+
+define <4 x i32> @test_vsubw_high_a16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubw_high_a16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %sub.i = sub <4 x i32> %a, %0
+ %and = and <4 x i32> %sub.i, <i32 65535, i32 65535, i32 65535, i32 65535>
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @test_vsubw_high_a32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubw_high_a32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %sub.i = sub <2 x i64> %a, %0
+ %and = and <2 x i64> %sub.i, <i64 4294967295, i64 4294967295>
+ ret <2 x i64> %and
+}
+
define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddhn_s16:
-; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddhn_s32:
-; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vaddhn_s64:
-; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddhn_u16:
-; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddhn_u32:
-; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vaddhn_u64:
-; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddhn_high_s16:
-; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%vaddhn.i.i = add <8 x i16> %a, %b
%vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddhn_high_s32:
-; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%vaddhn.i.i = add <4 x i32> %a, %b
%vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vaddhn_high_s64:
-; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%vaddhn.i.i = add <2 x i64> %a, %b
%vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vaddhn_high_u16:
-; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%vaddhn.i.i = add <8 x i16> %a, %b
%vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vaddhn_high_u32:
-; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%vaddhn.i.i = add <4 x i32> %a, %b
%vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vaddhn_high_u64:
-; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%vaddhn.i.i = add <2 x i64> %a, %b
%vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vraddhn_s16:
-; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: raddhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
ret <8 x i8> %vraddhn2.i
define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vraddhn_s32:
-; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: raddhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
ret <4 x i16> %vraddhn2.i
define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vraddhn_s64:
-; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: raddhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
ret <2 x i32> %vraddhn2.i
define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vraddhn_u16:
-; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: raddhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
ret <8 x i8> %vraddhn2.i
define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vraddhn_u32:
-; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: raddhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
ret <4 x i16> %vraddhn2.i
define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vraddhn_u64:
-; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: raddhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
ret <2 x i32> %vraddhn2.i
define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vraddhn_high_s16:
-; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vraddhn_high_s32:
-; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vraddhn_high_s64:
-; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vraddhn_high_u16:
-; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vraddhn_high_u32:
-; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vraddhn_high_u64:
-; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubhn_s16:
-; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubhn_s32:
-; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vsubhn_s64:
-; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubhn_u16:
-; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubhn_u32:
-; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vsubhn_u64:
-; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubhn_high_s16:
-; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%vsubhn.i.i = sub <8 x i16> %a, %b
%vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubhn_high_s32:
-; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%vsubhn.i.i = sub <4 x i32> %a, %b
%vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vsubhn_high_s64:
-; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%vsubhn.i.i = sub <2 x i64> %a, %b
%vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vsubhn_high_u16:
-; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%vsubhn.i.i = sub <8 x i16> %a, %b
%vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vsubhn_high_u32:
-; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%vsubhn.i.i = sub <4 x i32> %a, %b
%vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vsubhn_high_u64:
-; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%vsubhn.i.i = sub <2 x i64> %a, %b
%vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vrsubhn_s16:
-; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rsubhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
ret <8 x i8> %vrsubhn2.i
define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vrsubhn_s32:
-; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rsubhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
ret <4 x i16> %vrsubhn2.i
define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vrsubhn_s64:
-; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rsubhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
ret <2 x i32> %vrsubhn2.i
define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vrsubhn_u16:
-; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rsubhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
ret <8 x i8> %vrsubhn2.i
define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vrsubhn_u32:
-; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rsubhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
ret <4 x i16> %vrsubhn2.i
define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vrsubhn_u64:
-; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rsubhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
ret <2 x i32> %vrsubhn2.i
define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vrsubhn_high_s16:
-; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vrsubhn_high_s32:
-; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vrsubhn_high_s64:
-; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vrsubhn_high_u16:
-; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vrsubhn_high_u32:
-; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vrsubhn_high_u64:
-; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vabdl_s8:
-; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabdl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
entry:
%vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
%vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vabdl_s16:
-; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabdl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
entry:
%vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
%vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vabdl_s32:
-; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabdl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
entry:
%vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
%vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vabdl_u8:
-; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabdl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
entry:
%vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
%vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vabdl_u16:
-; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabdl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
entry:
%vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
%vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vabdl_u32:
-; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabdl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
entry:
%vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
%vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK-LABEL: test_vabal_s8:
-; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabal v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
%vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
; CHECK-LABEL: test_vabal_s16:
-; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabal v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
%vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: test_vabal_s32:
-; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabal v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
%vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK-LABEL: test_vabal_u8:
-; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabal v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
%vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
; CHECK-LABEL: test_vabal_u16:
-; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabal v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
%vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: test_vabal_u32:
-; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabal v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
%vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vabdl_high_s8:
-; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabdl2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vabdl_high_s16:
-; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabdl2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vabdl_high_s32:
-; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabdl2 v0.2d, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vabdl_high_u8:
-; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabdl2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vabdl_high_u16:
-; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabdl2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vabdl_high_u32:
-; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabdl2 v0.2d, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_vabal_high_s8:
-; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabal2 v0.8h, v1.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_vabal_high_s16:
-; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabal2 v0.4s, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_vabal_high_s32:
-; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabal2 v0.2d, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_vabal_high_u8:
-; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabal2 v0.8h, v1.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_vabal_high_u16:
-; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabal2 v0.4s, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_vabal_high_u32:
-; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabal2 v0.2d, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vmull_s8:
-; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
entry:
%vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
ret <8 x i16> %vmull.i
define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vmull_s16:
-; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
ret <4 x i32> %vmull2.i
define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vmull_s32:
-; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
ret <2 x i64> %vmull2.i
define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vmull_u8:
-; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
entry:
%vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
ret <8 x i16> %vmull.i
define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vmull_u16:
-; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
ret <4 x i32> %vmull2.i
define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vmull_u32:
-; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
ret <2 x i64> %vmull2.i
define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vmull_high_s8:
-; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vmull_high_s16:
-; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vmull_high_s32:
-; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vmull_high_u8:
-; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vmull_high_u16:
-; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vmull_high_u32:
-; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK-LABEL: test_vmlal_s8:
-; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
%add.i = add <8 x i16> %vmull.i.i, %a
define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
; CHECK-LABEL: test_vmlal_s16:
-; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
%add.i = add <4 x i32> %vmull2.i.i, %a
define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: test_vmlal_s32:
-; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
%add.i = add <2 x i64> %vmull2.i.i, %a
define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK-LABEL: test_vmlal_u8:
-; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
%add.i = add <8 x i16> %vmull.i.i, %a
define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
; CHECK-LABEL: test_vmlal_u16:
-; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
%add.i = add <4 x i32> %vmull2.i.i, %a
define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: test_vmlal_u32:
-; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
%add.i = add <2 x i64> %vmull2.i.i, %a
define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_vmlal_high_s8:
-; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal2 v0.8h, v1.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_vmlal_high_s16:
-; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_vmlal_high_s32:
-; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_vmlal_high_u8:
-; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal2 v0.8h, v1.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_vmlal_high_u16:
-; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_vmlal_high_u32:
-; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK-LABEL: test_vmlsl_s8:
-; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
%sub.i = sub <8 x i16> %a, %vmull.i.i
define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
; CHECK-LABEL: test_vmlsl_s16:
-; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
%sub.i = sub <4 x i32> %a, %vmull2.i.i
define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: test_vmlsl_s32:
-; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
%sub.i = sub <2 x i64> %a, %vmull2.i.i
define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK-LABEL: test_vmlsl_u8:
-; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
%sub.i = sub <8 x i16> %a, %vmull.i.i
define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
; CHECK-LABEL: test_vmlsl_u16:
-; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
%sub.i = sub <4 x i32> %a, %vmull2.i.i
define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: test_vmlsl_u32:
-; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
%sub.i = sub <2 x i64> %a, %vmull2.i.i
define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_vmlsl_high_s8:
-; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl2 v0.8h, v1.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_vmlsl_high_s16:
-; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_vmlsl_high_s32:
-; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_vmlsl_high_u8:
-; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl2 v0.8h, v1.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_vmlsl_high_u16:
-; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_vmlsl_high_u32:
-; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_vqdmull_s16:
-; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: ret
entry:
%vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
ret <4 x i32> %vqdmull2.i
define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_vqdmull_s32:
-; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
entry:
%vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
ret <2 x i64> %vqdmull2.i
define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
; CHECK-LABEL: test_vqdmlal_s16:
-; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
%vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: test_vqdmlal_s32:
-; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
%vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
; CHECK-LABEL: test_vqdmlsl_s16:
-; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
%vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: test_vqdmlsl_s32:
-; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
%vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_vqdmull_high_s16:
-; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vqdmull_high_s32:
-; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_vqdmlal_high_s16:
-; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_vqdmlal_high_s32:
-; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: test_vqdmlsl_high_s16:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: test_vqdmlsl_high_s32:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_vmull_p8:
-; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: ret
entry:
%vmull.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
ret <8 x i16> %vmull.i
define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vmull_high_p8:
-; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
}
define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
-; CHECK-LABEL: test_vmull_p64
-; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
+; CHECK-LABEL: test_vmull_p64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: fmov d1, x1
+; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-NEXT: mov x1, v0.d[1]
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
entry:
%vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b)
%vmull3.i = bitcast <16 x i8> %vmull2.i to i128
}
define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
-; CHECK-LABEL: test_vmull_high_p64
-; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: test_vmull_high_p64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d
+; CHECK-NEXT: mov x1, v0.d[1]
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
entry:
%0 = extractelement <2 x i64> %a, i32 1
%1 = extractelement <2 x i64> %b, i32 1
ret i128 %vmull3.i.i
}
-declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
-