From 5c842be9a08a5d35e0e7ce43efccbb1f8df12466 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 9 Nov 2016 04:50:48 +0000 Subject: [PATCH] [AVX-512] Make VBMI instruction set enabling imply that the BWI instruction set is also enabled. Summary: This is needed to make the v64i8 and v32i16 types legal for the 512-bit VBMI instructions. Fixes PR30912. Reviewers: delena, zvi Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D26322 llvm-svn: 286339 --- llvm/lib/Target/X86/X86.td | 4 +-- llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll | 18 ++++++------- llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll | 34 ++++++++++++------------ 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 9d48af9..e8fbe82 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -141,8 +141,8 @@ def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", "Enable AVX-512 Vector Length eXtensions", [FeatureAVX512]>; def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", - "Enable AVX-512 Vector Bit Manipulation Instructions", - [FeatureAVX512]>; + "Enable AVX-512 Vector Byte Manipulation Instructions", + [FeatureBWI]>; def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", "Enable AVX-512 Integer Fused Multiple-Add", [FeatureAVX512]>; diff --git a/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll index c3fb77f..4fcc3fe 100644 --- a/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vbmi | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vbmi | FileCheck %s declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { @@ -9,8 +9,8 @@ define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8 ; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm2 {%k1} ; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm3 {%k1} {z} ; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3) @@ -29,8 +29,8 @@ define <64 x i8>@test_int_x86_avx512_mask_pmultishift_qb_512(<64 x i8> %x0, <64 ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm3 {%k1} {z} ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3) @@ -51,8 +51,8 @@ define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x ; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z} -; CHECK-NEXT: vpaddb %zmm1, %zmm3, %zmm0 -; CHECK-NEXT: vpaddb %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddb %zmm1, %zmm4, %zmm0 +; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0 ; CHECK-NEXT: retq %res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3) @@ -73,8 +73,8 @@ define <64 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_512(<64 x i8> %x0, <64 x ; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; CHECK-NEXT: vpermt2b %zmm2, %zmm0, %zmm4 {%k1} {z} -; CHECK-NEXT: vpaddb %zmm1, %zmm3, %zmm0 -; CHECK-NEXT: vpaddb %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddb %zmm1, %zmm4, %zmm0 +; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0 ; CHECK-NEXT: retq %res = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) %res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3) diff --git a/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll index e7e403b..d915f6b 100644 --- a/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { @@ -10,8 +10,8 @@ define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8 ; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0] ; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x8d,0xd8] ; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0x8d,0xc0] -; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xcb] -; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) @@ -30,8 +30,8 @@ define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8 ; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0] ; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xd8] ; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x8d,0xc0] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xcb] -; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) @@ -50,8 +50,8 @@ define <16 x i8>@test_int_x86_avx512_mask_pmultishift_qb_128(<16 x i8> %x0, <16 ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x83,0xd9] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x83,0xc1] -; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xcb] -; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) @@ -70,8 +70,8 @@ define <32 x i8>@test_int_x86_avx512_mask_pmultishift_qb_256(<32 x i8> %x0, <32 ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xd9] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x83,0xc1] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xcb] -; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) @@ -92,8 +92,8 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x ; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x75,0xca] ; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] ; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2] -; CHECK-NEXT: vpaddb %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm4, %xmm0 ## encoding: [0x62,0xf1,0x5d,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm1, %xmm4, %xmm0 ## encoding: [0x62,0xf1,0x5d,0x08,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3) @@ -114,8 +114,8 @@ define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x ; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x75,0xca] ; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] ; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2] -; CHECK-NEXT: vpaddb %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc1] -; CHECK-NEXT: vpaddb %ymm0, %ymm4, %ymm0 ## encoding: [0x62,0xf1,0x5d,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## encoding: [0x62,0xf1,0x5d,0x28,0xfc,0xc1] +; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3) @@ -136,8 +136,8 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xca] ; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2] -; CHECK-NEXT: vpaddb %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm4, %xmm0 ## encoding: [0x62,0xf1,0x5d,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm1, %xmm4, %xmm0 ## encoding: [0x62,0xf1,0x5d,0x08,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3) @@ -158,8 +158,8 @@ define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xca] ; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2] -; CHECK-NEXT: vpaddb %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc1] -; CHECK-NEXT: vpaddb %ymm0, %ymm4, %ymm0 ## encoding: [0x62,0xf1,0x5d,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## encoding: [0x62,0xf1,0x5d,0x28,0xfc,0xc1] +; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3) -- 2.7.4