From bf61525e8c99de876c8d9c0a295d2e9319a39a42 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 12 Jan 2019 02:22:10 +0000 Subject: [PATCH] [X86] When lowering v1i1/v2i1/v4i1/v8i1 load/store with avx512f, but not avx512dq, use v16i1 as the intermediate mask type instead of v8i1. We still use i8 for the load/store type. So we need to convert to/from i16 to around the mask type. By doing this we get an i8->i16 extload which we can then pattern match to a KMOVW if the access is aligned. llvm-svn: 350989 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 16 +-- llvm/lib/Target/X86/X86InstrAVX512.td | 2 + .../X86/avx512-extract-subvector-load-store.ll | 108 +++++++-------------- llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 6 +- llvm/test/CodeGen/X86/avx512-mask-op.ll | 18 ++-- llvm/test/CodeGen/X86/avx512-select.ll | 24 ++--- llvm/test/CodeGen/X86/vector-sext-widen.ll | 96 ++++++------------ llvm/test/CodeGen/X86/vector-sext.ll | 96 ++++++------------ 8 files changed, 119 insertions(+), 247 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bd6a527..5766773 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20430,10 +20430,11 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && "Expected AVX512F without AVX512DQI"); - StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, - DAG.getUNDEF(MVT::v8i1), StoredVal, + StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, + DAG.getUNDEF(MVT::v16i1), StoredVal, DAG.getIntPtrConstant(0, dl)); - StoredVal = DAG.getBitcast(MVT::i8, StoredVal); + StoredVal = DAG.getBitcast(MVT::i16, StoredVal); + StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal); return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), @@ -20499,10 +20500,11 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, // Replace chain users with the new chain. assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!"); - SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT, - DAG.getBitcast(MVT::v8i1, NewLd), - DAG.getIntPtrConstant(0, dl)); - return DAG.getMergeValues({Extract, NewLd.getValue(1)}, dl); + SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd); + Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT, + DAG.getBitcast(MVT::v16i1, Val), + DAG.getIntPtrConstant(0, dl)); + return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl); } // Nothing useful we can do without SSE2 shuffles. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 46cddee..fe0eb28 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2887,6 +2887,8 @@ let Predicates = [HasDQI] in { let Predicates = [HasAVX512] in { def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; + def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), + (KMOVWkm addr:$src)>; } def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", diff --git a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll index e38ee56..df80e08 100644 --- a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll +++ b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll @@ -15,8 +15,7 @@ define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub ; ; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 4(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 4(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2 @@ -43,8 +42,7 @@ define void @load_v8i1_broadcast_7_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub ; ; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 6(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 6(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] @@ -71,8 +69,7 @@ define void @load_v16i1_broadcast_8_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x do ; ; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 8(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2 @@ -99,8 +96,7 @@ define void @load_v16i1_broadcast_8_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x flo ; ; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 8(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2 @@ -127,8 +123,7 @@ define void @load_v16i1_broadcast_15_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x d ; ; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 14(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 14(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] @@ -155,8 +150,7 @@ define void @load_v16i1_broadcast_15_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x fl ; ; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 12(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 12(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3] @@ -183,8 +177,7 @@ define void @load_v32i1_broadcast_16_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x d ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2 @@ -211,8 +204,7 @@ define void @load_v32i1_broadcast_16_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2 @@ -240,8 +232,7 @@ define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2 @@ -269,8 +260,7 @@ define void @load_v32i1_broadcast_31_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x d ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 30(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 30(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] @@ -297,8 +287,7 @@ define void @load_v32i1_broadcast_31_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 28(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 28(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3] @@ -327,8 +316,7 @@ define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 24(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 24(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7] @@ -357,8 +345,7 @@ define void @load_v64i1_broadcast_32_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x d ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2 @@ -385,8 +372,7 @@ define void @load_v64i1_broadcast_32_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2 @@ -414,8 +400,7 @@ define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2 @@ -471,8 +456,7 @@ define void @load_v64i1_broadcast_63_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x d ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 62(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 62(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] @@ -499,8 +483,7 @@ define void @load_v64i1_broadcast_63_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 60(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 60(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3] @@ -529,8 +512,7 @@ define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 56(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 56(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7] @@ -690,8 +672,7 @@ define void @load_v8i1_broadcast_4_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 4(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 4(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0 @@ -733,8 +714,7 @@ define void @load_v8i1_broadcast_7_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 6(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 6(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] @@ -776,8 +756,7 @@ define void @load_v16i1_broadcast_8_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 8(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0 @@ -802,8 +781,7 @@ define void @load_v16i1_broadcast_8_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 8(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0 @@ -845,8 +823,7 @@ define void @load_v16i1_broadcast_15_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 14(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 14(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] @@ -871,8 +848,7 @@ define void @load_v16i1_broadcast_15_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 12(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 12(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] @@ -914,8 +890,7 @@ define void @load_v32i1_broadcast_16_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0 @@ -940,8 +915,7 @@ define void @load_v32i1_broadcast_16_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0 @@ -967,8 +941,7 @@ define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 16(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0 @@ -1011,8 +984,7 @@ define void @load_v32i1_broadcast_31_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 30(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 30(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] @@ -1037,8 +1009,7 @@ define void @load_v32i1_broadcast_31_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 28(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 28(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] @@ -1065,8 +1036,7 @@ define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 24(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 24(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] @@ -1110,8 +1080,7 @@ define void @load_v64i1_broadcast_32_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0 @@ -1136,8 +1105,7 @@ define void @load_v64i1_broadcast_32_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0 @@ -1163,8 +1131,7 @@ define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 32(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0 @@ -1232,8 +1199,7 @@ define void @load_v64i1_broadcast_63_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 62(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 62(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] @@ -1258,8 +1224,7 @@ define void @load_v64i1_broadcast_63_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 60(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 60(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] @@ -1286,8 +1251,7 @@ define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl 56(%rdi), %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k1 +; AVX512NOTDQ-NEXT: kmovw 56(%rdi), %k1 ; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 7612f90..bfa7a58 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -7,10 +7,8 @@ declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone define i16 @unpckbw_test(i16 %a0, i16 %a1) { ; X86-LABEL: unpckbw_test: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k0 ## encoding: [0xc5,0xf8,0x92,0xc0] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 ## encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] ; X86-NEXT: kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1] ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; X86-NEXT: ## kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 500672c..1110e02 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -2197,8 +2197,7 @@ End: define <8 x i64> @load_8i1(<8 x i1>* %a) { ; KNL-LABEL: load_8i1: ; KNL: ## %bb.0: -; KNL-NEXT: movzbl (%rdi), %eax -; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; @@ -2210,8 +2209,7 @@ define <8 x i64> @load_8i1(<8 x i1>* %a) { ; ; AVX512BW-LABEL: load_8i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: kmovw (%rdi), %k1 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; @@ -2271,8 +2269,7 @@ define <16 x i32> @load_16i1(<16 x i1>* %a) { define <2 x i16> @load_2i1(<2 x i1>* %a) { ; KNL-LABEL: load_2i1: ; KNL: ## %bb.0: -; KNL-NEXT: movzbl (%rdi), %eax -; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL-NEXT: vzeroupper @@ -2286,8 +2283,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) { ; ; AVX512BW-LABEL: load_2i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: kmovw (%rdi), %k1 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper @@ -2315,8 +2311,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) { define <4 x i16> @load_4i1(<4 x i1>* %a) { ; KNL-LABEL: load_4i1: ; KNL: ## %bb.0: -; KNL-NEXT: movzbl (%rdi), %eax -; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kmovw (%rdi), %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL-NEXT: vzeroupper @@ -2330,8 +2325,7 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) { ; ; AVX512BW-LABEL: load_4i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: kmovw (%rdi), %k1 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index 1a5f78d..a34c64a 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -151,10 +151,8 @@ define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl (%ecx), %ecx -; X86-NEXT: kmovw %ecx, %k0 -; X86-NEXT: movzbl (%eax), %eax -; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: kmovw (%ecx), %k0 +; X86-NEXT: kmovw (%eax), %k1 ; X86-NEXT: korw %k1, %k0, %k0 ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -162,10 +160,8 @@ define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) { ; ; X64-LABEL: select05_mem: ; X64: # %bb.0: -; X64-NEXT: movzbl (%rsi), %eax -; X64-NEXT: kmovw %eax, %k0 -; X64-NEXT: movzbl (%rdi), %eax -; X64-NEXT: kmovw %eax, %k1 +; X64-NEXT: kmovw (%rsi), %k0 +; X64-NEXT: kmovw (%rdi), %k1 ; X64-NEXT: korw %k1, %k0, %k0 ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -202,10 +198,8 @@ define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl (%ecx), %ecx -; X86-NEXT: kmovw %ecx, %k0 -; X86-NEXT: movzbl (%eax), %eax -; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: kmovw (%ecx), %k0 +; X86-NEXT: kmovw (%eax), %k1 ; X86-NEXT: kandw %k1, %k0, %k0 ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -213,10 +207,8 @@ define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) { ; ; X64-LABEL: select06_mem: ; X64: # %bb.0: -; X64-NEXT: movzbl (%rsi), %eax -; X64-NEXT: kmovw %eax, %k0 -; X64-NEXT: movzbl (%rdi), %eax -; X64-NEXT: kmovw %eax, %k1 +; X64-NEXT: kmovw (%rsi), %k0 +; X64-NEXT: kmovw (%rdi), %k1 ; X64-NEXT: kandw %k1, %k0, %k0 ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/vector-sext-widen.ll b/llvm/test/CodeGen/X86/vector-sext-widen.ll index 43bcb7d..5cd814d 100644 --- a/llvm/test/CodeGen/X86/vector-sext-widen.ll +++ b/llvm/test/CodeGen/X86/vector-sext-widen.ll @@ -1388,23 +1388,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: retq ; -; AVX512F-LABEL: load_sext_2i1_to_2i64: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: load_sext_2i1_to_2i64: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq +; AVX512-LABEL: load_sext_2i1_to_2i64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: kmovw (%rdi), %k1 +; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; X32-SSE2-LABEL: load_sext_2i1_to_2i64: ; X32-SSE2: # %bb.0: # %entry @@ -1608,23 +1598,13 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: load_sext_4i1_to_4i32: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: load_sext_4i1_to_4i32: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq +; AVX512-LABEL: load_sext_4i1_to_4i32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: kmovw (%rdi), %k1 +; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; X32-SSE2-LABEL: load_sext_4i1_to_4i32: ; X32-SSE2: # %bb.0: # %entry @@ -1846,21 +1826,12 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: load_sext_4i1_to_4i64: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: load_sext_4i1_to_4i64: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: load_sext_4i1_to_4i64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: kmovw (%rdi), %k1 +; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: retq ; ; X32-SSE2-LABEL: load_sext_4i1_to_4i64: ; X32-SSE2: # %bb.0: # %entry @@ -2256,8 +2227,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; ; AVX512F-LABEL: load_sext_8i1_to_8i16: ; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: kmovw (%rdi), %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 @@ -2266,8 +2236,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; ; AVX512BW-LABEL: load_sext_8i1_to_8i16: ; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: kmovw (%rdi), %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper @@ -2711,21 +2680,12 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: load_sext_8i1_to_8i32: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: load_sext_8i1_to_8i32: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: load_sext_8i1_to_8i32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: kmovw (%rdi), %k1 +; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: retq ; ; X32-SSE2-LABEL: load_sext_8i1_to_8i32: ; X32-SSE2: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index cb3a02c..a5784f3 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -1388,23 +1388,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: retq ; -; AVX512F-LABEL: load_sext_2i1_to_2i64: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: load_sext_2i1_to_2i64: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq +; AVX512-LABEL: load_sext_2i1_to_2i64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: kmovw (%rdi), %k1 +; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; X32-SSE2-LABEL: load_sext_2i1_to_2i64: ; X32-SSE2: # %bb.0: # %entry @@ -1608,23 +1598,13 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: load_sext_4i1_to_4i32: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: load_sext_4i1_to_4i32: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq +; AVX512-LABEL: load_sext_4i1_to_4i32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: kmovw (%rdi), %k1 +; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; X32-SSE2-LABEL: load_sext_4i1_to_4i32: ; X32-SSE2: # %bb.0: # %entry @@ -1846,21 +1826,12 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: load_sext_4i1_to_4i64: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: load_sext_4i1_to_4i64: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: load_sext_4i1_to_4i64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: kmovw (%rdi), %k1 +; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: retq ; ; X32-SSE2-LABEL: load_sext_4i1_to_4i64: ; X32-SSE2: # %bb.0: # %entry @@ -2256,8 +2227,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; ; AVX512F-LABEL: load_sext_8i1_to_8i16: ; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: kmovw (%rdi), %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 @@ -2266,8 +2236,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; ; AVX512BW-LABEL: load_sext_8i1_to_8i16: ; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: kmovw (%rdi), %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper @@ -2711,21 +2680,12 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: load_sext_8i1_to_8i32: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzbl (%rdi), %eax -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: load_sext_8i1_to_8i32: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzbl (%rdi), %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: load_sext_8i1_to_8i32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: kmovw (%rdi), %k1 +; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: retq ; ; X32-SSE2-LABEL: load_sext_8i1_to_8i32: ; X32-SSE2: # %bb.0: # %entry -- 2.7.4