From: Phoebe Wang Date: Mon, 29 Apr 2024 00:40:26 +0000 (+0800) Subject: [X86][EVEX512] Check hasEVEX512 for canExtendTo512DQ (#90390) X-Git-Tag: upstream/18.1.6~53 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=58648f334d62c00e22e2200746513126d4448425;p=platform%2Fupstream%2Fllvm.git [X86][EVEX512] Check hasEVEX512 for canExtendTo512DQ (#90390) Fixes #90356 (cherry picked from commit 35b89dda2b9734917824b1457f149192669b314c) --- diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index a458b5f9ec8f..4d55a084b730 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -244,7 +244,8 @@ public: // TODO: Currently we're always allowing widening on CPUs without VLX, // because for many cases we don't have a better option. bool canExtendTo512DQ() const { - return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512); + return hasAVX512() && hasEVEX512() && + (!hasVLX() || getPreferVectorWidth() >= 512); } bool canExtendTo512BW() const { return hasBWI() && canExtendTo512DQ(); diff --git a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll index 4988fc35b10e..fdc25f44b156 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,EVEX256 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s --check-prefixes=CHECK,EVEX512 ; 256-bit @@ -236,3 +236,34 @@ define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) { ret <8 x i16> %x } +define i16 @PR90356(<16 x i1> %a) { +; EVEX256-LABEL: PR90356: +; EVEX256: # %bb.0: +; EVEX256-NEXT: vpsllw $7, %xmm0, %xmm0 +; EVEX256-NEXT: vpmovb2m %xmm0, %k1 +; EVEX256-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; EVEX256-NEXT: movb $63, %al +; EVEX256-NEXT: kmovd %eax, %k1 +; EVEX256-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} +; EVEX256-NEXT: vptestmd %zmm0, %zmm0, %k0 +; EVEX256-NEXT: kmovd %k0, %eax +; EVEX256-NEXT: # kill: def $ax killed $ax killed $eax +; EVEX256-NEXT: vzeroupper +; EVEX256-NEXT: retq +; +; EVEX512-LABEL: PR90356: +; EVEX512: # %bb.0: +; EVEX512-NEXT: vpsllw $7, %xmm0, %xmm0 +; EVEX512-NEXT: vpmovb2m %xmm0, %k0 +; EVEX512-NEXT: vpmovm2w %k0, %ymm0 +; EVEX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; EVEX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] +; EVEX512-NEXT: vpmovw2m %ymm0, %k0 +; EVEX512-NEXT: kmovd %k0, %eax +; EVEX512-NEXT: # kill: def $ax killed $ax killed $eax +; EVEX512-NEXT: vzeroupper +; EVEX512-NEXT: retq + %1 = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +}