From e430de8be62e2789d6fe7f3c8ef23f00462c9440 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 1 May 2016 06:52:19 +0000 Subject: [PATCH] [AVX512] Prefer AVX512 VPACK instructions over AVX/AVX2 instructions when VLX and BWI are supported. llvm-svn: 268189 --- llvm/lib/Target/X86/X86InstrSSE.td | 6 +++--- llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 2f6074f..da559d3 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4451,7 +4451,7 @@ multiclass sse4_pack_y opc, string OpcodeStr, ValueType OutVT, Sched<[WriteShuffleLd, ReadAfterLd]>; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, bc_v8i16, loadv2i64, 0>, VEX_4V; defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, @@ -4463,7 +4463,7 @@ let Predicates = [HasAVX] in { bc_v4i32, loadv2i64, 0>, VEX_4V; } -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss, bc_v16i16>, VEX_4V, VEX_L; defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, @@ -4484,7 +4484,7 @@ let Constraints = "$src1 = $dst" in { defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, bc_v8i16, memopv2i64>; - let Predicates = [HasSSE41] in + let Predicates = [UseSSE41] in defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, bc_v4i32, memopv2i64>; } diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index cc686db..0f9f071 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2055,7 +2055,7 @@ declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ;CHECK-LABEL: test_mask_packs_epi32_rr_128 - ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1] + ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0xc1] %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res } @@ -2076,7 +2076,7 @@ define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ;CHECK-LABEL: test_mask_packs_epi32_rm_128 - ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x07] + ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0x07] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -2132,7 +2132,7 @@ declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ;CHECK-LABEL: test_mask_packs_epi32_rr_256 - ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1] + ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0xc1] %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res } @@ -2153,7 +2153,7 @@ define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i1 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ;CHECK-LABEL: test_mask_packs_epi32_rm_256 - ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x07] + ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0x07] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -2209,7 +2209,7 @@ declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { ;CHECK-LABEL: test_mask_packs_epi16_rr_128 - ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1] + ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x63,0xc1] %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) ret <16 x i8> %res } @@ -2230,7 +2230,7 @@ define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ;CHECK-LABEL: test_mask_packs_epi16_rm_128 - ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0x07] + ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x63,0x07] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) ret <16 x i8> %res @@ -2256,7 +2256,7 @@ declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { ;CHECK-LABEL: test_mask_packs_epi16_rr_256 - ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1] + ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x63,0xc1] %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) ret <32 x i8> %res } @@ -2277,7 +2277,7 @@ define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ;CHECK-LABEL: test_mask_packs_epi16_rm_256 - ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0x07] + ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x63,0x07] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) ret <32 x i8> %res -- 2.7.4