{ X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
{ X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
{ X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
+ { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r },
+ { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m },
+ { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r },
+ { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m },
+ { X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr },
+ { X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm },
+ { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r },
+ { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
+ { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr },
+ { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm },
};
static const uint16_t ReplaceableInstrsAVX2[][3] = {
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: vmovups (%rbx), %zmm0
; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
-; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx)
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
+; CHECK-NEXT: vmovaps %zmm1, (%rbx)
; CHECK-NEXT: callq _Print__512
; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload
; CHECK-NEXT: callq _Print__512
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0
-; CHECK-NEXT: vmovdqa32 %zmm0, (%rbx)
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm0
+; CHECK-NEXT: vmovaps %zmm0, (%rbx)
; CHECK-NEXT: addq $112, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
define <8 x i64> @test_mm512_broadcastd_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm512_broadcastd_epi32:
; X32: # BB#0:
-; X32-NEXT: vpbroadcastd %xmm0, %zmm0
+; X32-NEXT: vbroadcastss %xmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_broadcastd_epi32:
; X64: # BB#0:
-; X64-NEXT: vpbroadcastd %xmm0, %zmm0
+; X64-NEXT: vbroadcastss %xmm0, %zmm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <16 x i32> zeroinitializer
define <8 x i64> @test_mm512_broadcastq_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm512_broadcastq_epi64:
; X32: # BB#0:
-; X32-NEXT: vpbroadcastq %xmm0, %zmm0
+; X32-NEXT: vbroadcastsd %xmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_broadcastq_epi64:
; X64: # BB#0:
-; X64-NEXT: vpbroadcastq %xmm0, %zmm0
+; X64-NEXT: vbroadcastsd %xmm0, %zmm0
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> undef, <8 x i32> zeroinitializer
ret <8 x i64> %res
define <16 x i32> @_xmm16xi32(<16 x i32> %a) {
; ALL-LABEL: _xmm16xi32:
; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastd %xmm0, %zmm0
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
; ALL-NEXT: retq
%b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
ret <16 x i32> %b
define <16 x i32> @_invec8xi32(<8 x i32>%a) {
; ALL-LABEL: _invec8xi32:
; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastd %xmm0, %zmm0
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
; ALL-NEXT: retq
%res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
ret <16 x i32>%res
define <8 x i64> @_invec4xi64(<4 x i64>%a) {
; ALL-LABEL: _invec4xi64:
; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %zmm0
+; ALL-NEXT: vbroadcastsd %xmm0, %zmm0
; ALL-NEXT: retq
%res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
ret <8 x i64>%res
define <2 x i64> @test_mm_broadcastd_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm_broadcastd_epi32:
; X32: # BB#0:
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vbroadcastss %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_broadcastd_epi32:
; X64: # BB#0:
-; X64-NEXT: vpbroadcastd %xmm0, %xmm0
+; X64-NEXT: vbroadcastss %xmm0, %xmm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
define <4 x i64> @test_mm256_broadcastd_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm256_broadcastd_epi32:
; X32: # BB#0:
-; X32-NEXT: vpbroadcastd %xmm0, %ymm0
+; X32-NEXT: vbroadcastss %xmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_broadcastd_epi32:
; X64: # BB#0:
-; X64-NEXT: vpbroadcastd %xmm0, %ymm0
+; X64-NEXT: vbroadcastss %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <8 x i32> zeroinitializer
define <4 x i64> @test_mm256_broadcastq_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_broadcastq_epi64:
; X32: # BB#0:
-; X32-NEXT: vpbroadcastq %xmm0, %ymm0
+; X32-NEXT: vbroadcastsd %xmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_broadcastq_epi64:
; X64: # BB#0:
-; X64-NEXT: vpbroadcastq %xmm0, %ymm0
+; X64-NEXT: vbroadcastsd %xmm0, %ymm0
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> zeroinitializer
ret <4 x i64> %res
; KNL_32-LABEL: test11:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1
+; KNL_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
; KNL_32-NEXT: retl
; SKX_32-LABEL: test11:
; SKX_32: # BB#0:
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1
+; SKX_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
; SKX_32-NEXT: retl
; X32_AVX512VL-LABEL: fabs_v4f64:
; X32_AVX512VL: # BB#0:
; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %ymm1
-; X32_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0
+; X32_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v4f64:
; X64_AVX512VL-LABEL: fabs_v4f64:
; X64_AVX512VL: # BB#0:
; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1
-; X64_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0
+; X64_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v4f64:
;
; AVX512VL-LABEL: shuffle_v4i64_0000:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0
+; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %shuffle
}
define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
-; AVX1-LABEL: splat_mem_v4i64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splat_mem_v4i64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: splat_mem_v4i64:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: splat_mem_v4i64:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <4 x i64> undef, i64 %a, i64 0
%shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
}
define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
-; AVX1-LABEL: splat_mem_v4i64_from_v2i64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splat_mem_v4i64_from_v2i64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: splat_mem_v4i64_from_v2i64:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT: retq
%v = load <2 x i64>, <2 x i64>* %ptr
%shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %shuffle
}
define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) {
-; AVX1-LABEL: insert_dup_mem_v4i64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: insert_dup_mem_v4i64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: insert_dup_mem_v4i64:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: insert_dup_mem_v4i64:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT: retq
%tmp = load i64, i64* %ptr, align 1
%tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer
define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastd %xmm0, %zmm0
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i32> %shuffle
;
; AVX512F-LABEL: shuffle_v8i64_00000000:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_00000000:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
define <16 x i32> @combine_permvar_as_vpbroadcastd512(<16 x i32> %x0) {
; X32-LABEL: combine_permvar_as_vpbroadcastd512:
; X32: # BB#0:
-; X32-NEXT: vpbroadcastd %xmm0, %zmm0
+; X32-NEXT: vbroadcastss %xmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_permvar_as_vpbroadcastd512:
; X64: # BB#0:
-; X64-NEXT: vpbroadcastd %xmm0, %zmm0
+; X64-NEXT: vbroadcastss %xmm0, %zmm0
; X64-NEXT: retq
%1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> zeroinitializer, <16 x i32> undef, i16 -1)
ret <16 x i32> %1
;
; X64-LABEL: combine_permvar_as_vpbroadcastq512:
; X64: # BB#0:
-; X64-NEXT: vpbroadcastq %xmm0, %zmm0
+; X64-NEXT: vbroadcastsd %xmm0, %zmm0
; X64-NEXT: retq
%1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
ret <8 x i64> %1