define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
; CHECK-LABEL: dupzext_v2i16_v2i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xffff
+; CHECK-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-NEXT: dup v2.2d, x8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: mov x10, v0.d[1]
-; CHECK-NEXT: mul x9, x8, x9
-; CHECK-NEXT: mul x8, x8, x10
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: xtn v2.2s, v2.2d
+; CHECK-NEXT: umull v0.2d, v2.2s, v0.2s
; CHECK-NEXT: ret
entry:
%in = zext i16 %src to i64
define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) {
; CHECK-LABEL: typei1_v8i1_v8i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.8b, #1
; CHECK-NEXT: and w8, w0, #0x1
+; CHECK-NEXT: movi v1.8b, #1
+; CHECK-NEXT: dup v2.8h, w8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: dup v1.8h, w8
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: xtn v2.8b, v2.8h
+; CHECK-NEXT: umull v0.8h, v2.8b, v0.8b
; CHECK-NEXT: ret
entry:
%in = zext i1 %src to i16
define <8 x i16> @umull_and_v8i16(<8 x i8> %src1, <8 x i16> %src2) {
; CHECK-LABEL: umull_and_v8i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: bic v1.8h, #255, lsl #8
-; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: xtn v1.8b, v1.8h
+; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
; CHECK-NEXT: ret
entry:
%in1 = zext <8 x i8> %src1 to <8 x i16>
define <8 x i16> @umull_and_v8i16_c(<8 x i8> %src1, <8 x i16> %src2) {
; CHECK-LABEL: umull_and_v8i16_c:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: bic v1.8h, #255, lsl #8
-; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: xtn v1.8b, v1.8h
+; CHECK-NEXT: umull v0.8h, v1.8b, v0.8b
; CHECK-NEXT: ret
entry:
%in1 = zext <8 x i8> %src1 to <8 x i16>
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v2.8b, #15
; CHECK-NEXT: bic v1.8h, #255, lsl #8
+; CHECK-NEXT: xtn v1.8b, v1.8h
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
; CHECK-NEXT: ret
entry:
%in1 = zext <8 x i4> %src1 to <8 x i16>
; CHECK-LABEL: umull_and_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v1.4h, v1.4s
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: ret
entry:
%in1 = zext <4 x i16> %src1 to <4 x i32>
; CHECK-LABEL: umull_and_v8i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v3.2d, #0x0000ff000000ff
-; CHECK-NEXT: ushll v4.4s, v0.4h, #0
-; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-NEXT: and v5.16b, v1.16b, v3.16b
-; CHECK-NEXT: and v1.16b, v2.16b, v3.16b
-; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s
-; CHECK-NEXT: mul v0.4s, v4.4s, v5.4s
+; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
+; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT: xtn v1.4h, v1.4s
+; CHECK-NEXT: xtn v2.4h, v2.4s
+; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: umull v1.4s, v4.4h, v2.4h
; CHECK-NEXT: ret
entry:
%in1 = zext <8 x i16> %src1 to <8 x i32>
; CHECK-LABEL: umull_and_v8i32_dup:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: dup v2.4s, w8
-; CHECK-NEXT: mul v0.4s, v1.4s, v2.4s
-; CHECK-NEXT: mul v1.4s, v3.4s, v2.4s
+; CHECK-NEXT: xtn v2.4h, v2.4s
+; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
+; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
; CHECK-NEXT: ret
entry:
%in1 = zext <8 x i16> %src1 to <8 x i32>
; CHECK-LABEL: umull_and_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v2.2d, #0x000000000000ff
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: mov x8, v1.d[1]
-; CHECK-NEXT: mov x11, v0.d[1]
-; CHECK-NEXT: mul x9, x10, x9
-; CHECK-NEXT: mul x8, x11, x8
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: xtn v1.2s, v1.2d
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: ret
entry:
%in1 = zext <2 x i32> %src1 to <2 x i64>
; CHECK-LABEL: umull_and_v4i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v3.2d, #0x000000000000ff
-; CHECK-NEXT: ushll v4.2d, v0.2s, #0
-; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-NEXT: fmov x14, d4
+; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-NEXT: fmov x11, d0
-; CHECK-NEXT: mov x9, v0.d[1]
-; CHECK-NEXT: and v0.16b, v1.16b, v3.16b
-; CHECK-NEXT: fmov x10, d2
-; CHECK-NEXT: fmov x13, d0
-; CHECK-NEXT: mov x8, v2.d[1]
-; CHECK-NEXT: mov x12, v0.d[1]
-; CHECK-NEXT: mul x10, x11, x10
-; CHECK-NEXT: mov x15, v4.d[1]
-; CHECK-NEXT: mul x11, x14, x13
-; CHECK-NEXT: mul x8, x9, x8
-; CHECK-NEXT: fmov d1, x10
-; CHECK-NEXT: mul x9, x15, x12
-; CHECK-NEXT: fmov d0, x11
-; CHECK-NEXT: mov v1.d[1], x8
-; CHECK-NEXT: mov v0.d[1], x9
+; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT: xtn v1.2s, v1.2d
+; CHECK-NEXT: xtn v2.2s, v2.2d
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: umull v1.2d, v4.2s, v2.2s
; CHECK-NEXT: ret
entry:
%in1 = zext <4 x i32> %src1 to <4 x i64>
define <4 x i64> @umull_and_v4i64_dup(<4 x i32> %src1, i64 %src2) {
; CHECK-LABEL: umull_and_v4i64_dup:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0
; CHECK-NEXT: and x8, x0, #0xff
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: fmov x11, d0
-; CHECK-NEXT: mov x10, v1.d[1]
-; CHECK-NEXT: mov x12, v0.d[1]
-; CHECK-NEXT: mul x9, x9, x8
-; CHECK-NEXT: mul x11, x11, x8
-; CHECK-NEXT: mul x10, x10, x8
-; CHECK-NEXT: mul x8, x12, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: fmov d0, x11
-; CHECK-NEXT: mov v1.d[1], x10
-; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: dup v2.2d, x8
+; CHECK-NEXT: xtn v2.2s, v2.2d
+; CHECK-NEXT: umull v0.2d, v0.2s, v2.2s
+; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ret
entry:
%in1 = zext <4 x i32> %src1 to <4 x i64>