; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
-define <4 x i16> @mulhuw_v4i16(<4 x i16> %a, <4 x i16> %b) {
-; SSE-LABEL: mulhuw_v4i16:
+define <4 x i16> @zext_mulhuw_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; SSE-LABEL: zext_mulhuw_v4i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: mulhuw_v4i16:
+; AVX-LABEL: zext_mulhuw_v4i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
ret <4 x i16> %e
}
-define <4 x i16> @mulhw_v4i16(<4 x i16> %a, <4 x i16> %b) {
-; SSE-LABEL: mulhw_v4i16:
+define <4 x i16> @sext_mulhw_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; SSE-LABEL: sext_mulhw_v4i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: mulhw_v4i16:
+; AVX-LABEL: sext_mulhw_v4i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
ret <4 x i16> %e
}
-define <8 x i16> @mulhuw_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; SSE-LABEL: mulhuw_v8i16:
+define <8 x i16> @zext_mulhuw_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: zext_mulhuw_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: mulhuw_v8i16:
+; AVX-LABEL: zext_mulhuw_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
ret <8 x i16> %e
}
-define <8 x i16> @mulhw_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; SSE-LABEL: mulhw_v8i16:
+define <8 x i16> @sext_mulhw_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: sext_mulhw_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: mulhw_v8i16:
+; AVX-LABEL: sext_mulhw_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
ret <8 x i16> %e
}
-define <16 x i16> @mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
-; SSE-LABEL: mulhuw_v16i16:
+define <16 x i16> @zext_mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: zext_mulhuw_v16i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm2, %xmm0
; SSE-NEXT: pmulhuw %xmm3, %xmm1
; SSE-NEXT: retq
;
-; AVX-LABEL: mulhuw_v16i16:
+; AVX-LABEL: zext_mulhuw_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
ret <16 x i16> %e
}
-define <16 x i16> @mulhw_v16i16(<16 x i16> %a, <16 x i16> %b) {
-; SSE-LABEL: mulhw_v16i16:
+define <16 x i16> @sext_mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: sext_mulhuw_v16i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm2, %xmm0
; SSE-NEXT: pmulhw %xmm3, %xmm1
; SSE-NEXT: retq
;
-; AVX-LABEL: mulhw_v16i16:
+; AVX-LABEL: sext_mulhuw_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
ret <16 x i16> %e
}
-define <32 x i16> @mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
-; SSE-LABEL: mulhuw_v32i16:
+define <32 x i16> @zext_mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: zext_mulhuw_v32i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm4, %xmm0
; SSE-NEXT: pmulhuw %xmm5, %xmm1
; SSE-NEXT: pmulhuw %xmm7, %xmm3
; SSE-NEXT: retq
;
-; AVX2-LABEL: mulhuw_v32i16:
+; AVX2-LABEL: zext_mulhuw_v32i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmulhuw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: mulhuw_v32i16:
+; AVX512F-LABEL: zext_mulhuw_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
-; AVX512BW-LABEL: mulhuw_v32i16:
+; AVX512BW-LABEL: zext_mulhuw_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
ret <32 x i16> %e
}
-define <32 x i16> @mulhw_v32i16(<32 x i16> %a, <32 x i16> %b) {
-; SSE-LABEL: mulhw_v32i16:
+define <32 x i16> @sext_mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: sext_mulhuw_v32i16:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm4, %xmm0
; SSE-NEXT: pmulhw %xmm5, %xmm1
; SSE-NEXT: pmulhw %xmm7, %xmm3
; SSE-NEXT: retq
;
-; AVX2-LABEL: mulhw_v32i16:
+; AVX2-LABEL: sext_mulhuw_v32i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmulhw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: mulhw_v32i16:
+; AVX512F-LABEL: sext_mulhuw_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
-; AVX512BW-LABEL: mulhw_v32i16:
+; AVX512BW-LABEL: sext_mulhuw_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
ret <32 x i16> %e
}
-define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
-; SSE-LABEL: mulhuw_v64i16:
+define <64 x i16> @zext_mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
+; SSE-LABEL: zext_mulhuw_v64i16:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: pmulhuw {{[0-9]+}}(%rsp), %xmm0
; SSE-NEXT: movdqa %xmm0, (%rdi)
; SSE-NEXT: retq
;
-; AVX2-LABEL: mulhuw_v64i16:
+; AVX2-LABEL: zext_mulhuw_v64i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpmulhuw %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpmulhuw %ymm7, %ymm3, %ymm3
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: mulhuw_v64i16:
+; AVX512F-LABEL: zext_mulhuw_v64i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512F-NEXT: retq
;
-; AVX512BW-LABEL: mulhuw_v64i16:
+; AVX512BW-LABEL: zext_mulhuw_v64i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpmulhuw %zmm3, %zmm1, %zmm1
ret <64 x i16> %e
}
-define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) {
-; SSE-LABEL: mulhw_v64i16:
+define <64 x i16> @sext_mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
+; SSE-LABEL: sext_mulhuw_v64i16:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm0
; SSE-NEXT: movdqa %xmm0, (%rdi)
; SSE-NEXT: retq
;
-; AVX2-LABEL: mulhw_v64i16:
+; AVX2-LABEL: sext_mulhuw_v64i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpmulhw %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpmulhw %ymm7, %ymm3, %ymm3
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: mulhw_v64i16:
+; AVX512F-LABEL: sext_mulhuw_v64i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512F-NEXT: retq
;
-; AVX512BW-LABEL: mulhw_v64i16:
+; AVX512BW-LABEL: sext_mulhuw_v64i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpmulhw %zmm3, %zmm1, %zmm1
ret <64 x i16> %e
}
-define <8 x i16> @mulhuw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
-; SSE-LABEL: mulhuw_v8i16_i64:
+define <8 x i16> @zext_mulhuw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: zext_mulhuw_v8i16_i64:
; SSE: # %bb.0:
; SSE-NEXT: pmulhuw %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: mulhuw_v8i16_i64:
+; AVX-LABEL: zext_mulhuw_v8i16_i64:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
ret <8 x i16> %e
}
-define <8 x i16> @mulhw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
-; SSE-LABEL: mulhw_v8i16_i64:
+define <8 x i16> @sext_mulhuw_v8i16_i64(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: sext_mulhuw_v8i16_i64:
; SSE: # %bb.0:
; SSE-NEXT: pmulhw %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: mulhw_v8i16_i64:
+; AVX-LABEL: sext_mulhuw_v8i16_i64:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
ret <8 x i16> %e
}
-define <4 x i32> @mulhuw_v4i16_lshr(<4 x i16> %a, <4 x i16> %b) {
-; SSE2-LABEL: mulhuw_v4i16_lshr:
+define <4 x i32> @zext_mulhuw_v4i16_lshr(<4 x i16> %a, <4 x i16> %b) {
+; SSE2-LABEL: zext_mulhuw_v4i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: pmulhuw %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: retq
;
-; SSE41-LABEL: mulhuw_v4i16_lshr:
+; SSE41-LABEL: zext_mulhuw_v4i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: pmulhuw %xmm1, %xmm0
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE41-NEXT: retq
;
-; AVX-LABEL: mulhuw_v4i16_lshr:
+; AVX-LABEL: zext_mulhuw_v4i16_lshr:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
ret <4 x i32> %d
}
-define <8 x i32> @mulhuw_v8i16_lshr(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: mulhuw_v8i16_lshr:
+define <8 x i32> @zext_mulhuw_v8i16_lshr(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: zext_mulhuw_v8i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pmulhuw %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: retq
;
-; SSE41-LABEL: mulhuw_v8i16_lshr:
+; SSE41-LABEL: zext_mulhuw_v8i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: pmulhuw %xmm1, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm1
; SSE41-NEXT: retq
;
-; AVX-LABEL: mulhuw_v8i16_lshr:
+; AVX-LABEL: zext_mulhuw_v8i16_lshr:
; AVX: # %bb.0:
; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
ret <8 x i32> %d
}
-define <16 x i32> @mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
-; SSE2-LABEL: mulhuw_v16i16_lshr:
+define <16 x i32> @zext_mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: zext_mulhuw_v16i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm4, %xmm3
; SSE2-NEXT: retq
;
-; SSE41-LABEL: mulhuw_v16i16_lshr:
+; SSE41-LABEL: zext_mulhuw_v16i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm1, %xmm4
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm4, %xmm3
; SSE41-NEXT: retq
;
-; AVX2-LABEL: mulhuw_v16i16_lshr:
+; AVX2-LABEL: zext_mulhuw_v16i16_lshr:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: retq
;
-; AVX512-LABEL: mulhuw_v16i16_lshr:
+; AVX512-LABEL: zext_mulhuw_v16i16_lshr:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
ret <16 x i32> %d
}
-define <32 x i32> @mulhuw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
-; SSE2-LABEL: mulhuw_v32i16_lshr:
+define <32 x i32> @zext_mulhuw_v32i16_lshr(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: zext_mulhuw_v32i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: movq %rdi, %rax
; SSE2-NEXT: pmulhuw %xmm7, %xmm3
; SSE2-NEXT: movdqa %xmm8, (%rdi)
; SSE2-NEXT: retq
;
-; SSE41-LABEL: mulhuw_v32i16_lshr:
+; SSE41-LABEL: zext_mulhuw_v32i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: movq %rdi, %rax
; SSE41-NEXT: pmulhuw %xmm4, %xmm0
; SSE41-NEXT: movdqa %xmm8, (%rdi)
; SSE41-NEXT: retq
;
-; AVX2-LABEL: mulhuw_v32i16_lshr:
+; AVX2-LABEL: zext_mulhuw_v32i16_lshr:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX2-NEXT: vmovdqa %ymm4, %ymm1
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: mulhuw_v32i16_lshr:
+; AVX512F-LABEL: zext_mulhuw_v32i16_lshr:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-NEXT: retq
;
-; AVX512BW-LABEL: mulhuw_v32i16_lshr:
+; AVX512BW-LABEL: zext_mulhuw_v32i16_lshr:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm1
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
ret <32 x i32> %d
}
-define <64 x i32> @mulhuw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
-; SSE2-LABEL: mulhuw_v64i16_lshr:
+define <64 x i32> @zext_mulhuw_v64i16_lshr(<64 x i16> %a, <64 x i16> %b) {
+; SSE2-LABEL: zext_mulhuw_v64i16_lshr:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm7, %xmm8
; SSE2-NEXT: movq %rdi, %rax
; SSE2-NEXT: movaps %xmm0, (%rdi)
; SSE2-NEXT: retq
;
-; SSE41-LABEL: mulhuw_v64i16_lshr:
+; SSE41-LABEL: zext_mulhuw_v64i16_lshr:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm8
; SSE41-NEXT: movq %rdi, %rax
; SSE41-NEXT: movdqa %xmm0, (%rdi)
; SSE41-NEXT: retq
;
-; AVX2-LABEL: mulhuw_v64i16_lshr:
+; AVX2-LABEL: zext_mulhuw_v64i16_lshr:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: vpmulhuw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: mulhuw_v64i16_lshr:
+; AVX512F-LABEL: zext_mulhuw_v64i16_lshr:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm4
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
; AVX512F-NEXT: vmovdqa64 %zmm5, %zmm1
; AVX512F-NEXT: retq
;
-; AVX512BW-LABEL: mulhuw_v64i16_lshr:
+; AVX512BW-LABEL: zext_mulhuw_v64i16_lshr:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmulhuw %zmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
ret <64 x i32> %d
}
-define <8 x i64> @mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: mulhuw_v8i16_lshr_i64:
+define <8 x i64> @zext_mulhuw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: zext_mulhuw_v8i16_lshr_i64:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: movdqa %xmm4, %xmm1
; SSE2-NEXT: retq
;
-; SSE41-LABEL: mulhuw_v8i16_lshr_i64:
+; SSE41-LABEL: zext_mulhuw_v8i16_lshr_i64:
; SSE41: # %bb.0:
; SSE41-NEXT: pmulhuw %xmm1, %xmm0
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: retq
;
-; AVX2-LABEL: mulhuw_v8i16_lshr_i64:
+; AVX2-LABEL: zext_mulhuw_v8i16_lshr_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; AVX2-NEXT: retq
;
-; AVX512-LABEL: mulhuw_v8i16_lshr_i64:
+; AVX512-LABEL: zext_mulhuw_v8i16_lshr_i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
ret <8 x i64> %d
}
-define <8 x i64> @mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: mulhsw_v8i16_lshr_i64:
+define <8 x i64> @sext_mulhsw_v8i16_lshr_i64(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: sext_mulhsw_v8i16_lshr_i64:
; SSE2: # %bb.0:
; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm6
; SSE2-NEXT: psrlq $16, %xmm3
; SSE2-NEXT: retq
;
-; SSE41-LABEL: mulhsw_v8i16_lshr_i64:
+; SSE41-LABEL: sext_mulhsw_v8i16_lshr_i64:
; SSE41: # %bb.0:
; SSE41-NEXT: pmulhw %xmm1, %xmm0
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: retq
;
-; AVX2-LABEL: mulhsw_v8i16_lshr_i64:
+; AVX2-LABEL: sext_mulhsw_v8i16_lshr_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; AVX2-NEXT: retq
;
-; AVX512-LABEL: mulhsw_v8i16_lshr_i64:
+; AVX512-LABEL: sext_mulhsw_v8i16_lshr_i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
ret <8 x i64> %d
}
-define <8 x i64> @mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: mulhsw_v8i16_ashr_i64:
+define <8 x i64> @sext_mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: sext_mulhsw_v8i16_ashr_i64:
; SSE2: # %bb.0:
; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm5
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; SSE2-NEXT: retq
;
-; SSE41-LABEL: mulhsw_v8i16_ashr_i64:
+; SSE41-LABEL: sext_mulhsw_v8i16_ashr_i64:
; SSE41: # %bb.0:
; SSE41-NEXT: pmulhw %xmm1, %xmm0
; SSE41-NEXT: pmovsxwq %xmm0, %xmm4
; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: retq
;
-; AVX2-LABEL: mulhsw_v8i16_ashr_i64:
+; AVX2-LABEL: sext_mulhsw_v8i16_ashr_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxwq %xmm1, %ymm0
; AVX2-NEXT: vpmovsxwq %xmm1, %ymm1
; AVX2-NEXT: retq
;
-; AVX512-LABEL: mulhsw_v8i16_ashr_i64:
+; AVX512-LABEL: sext_mulhsw_v8i16_ashr_i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0