From: Simon Pilgrim Date: Wed, 31 May 2023 10:41:20 +0000 (+0100) Subject: [X86] getTargetConstantBitsFromNode - ensure we truncate broadcasts from wider constants. X-Git-Tag: upstream/17.0.6~6654 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=65dce67f16fee20e8a43514408e0016540c539d5;p=platform%2Fupstream%2Fllvm.git [X86] getTargetConstantBitsFromNode - ensure we truncate broadcasts from wider constants. We might be broadcasting the lower portion of a wider constant pool entry, ensure we truncate the full width of the constant back to the broadcast width. (This also exposes an issue with how asm comments print these constants - I'll address that in a followup). Fixes #63030 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1126870..9a201b7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7542,6 +7542,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) { if (UndefSrcElts[0]) UndefSrcElts.setBits(0, NumSrcElts); + if (SrcEltBits[0].getBitWidth() != SrcEltSizeInBits) + SrcEltBits[0] = SrcEltBits[0].trunc(SrcEltSizeInBits); SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); return CastBitData(UndefSrcElts, SrcEltBits); } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index a6dbbfb..91f550b 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86,AVX2 -; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX512 +; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86,AVX2,X86-AVX2 +; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,AVX512,X86-AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,AVX2,X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX512,X64-AVX512 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) @@ -869,6 +869,62 @@ define <32 x i8> @PR52122(<32 x i8> %0, <32 x i8> %1) { ret <32 x i8> %5 } +define void @PR63030(ptr %p0) { +; X86-AVX2-LABEL: PR63030: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX2-NEXT: vmovaps (%eax), %xmm0 +; X86-AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [3,0,2,0,3,0,2,0] +; X86-AVX2-NEXT: # xmm1 = mem[0,0] +; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[1,1,0,0] +; X86-AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3],ymm2[4,5,6,7] +; X86-AVX2-NEXT: vmovaps {{.*#+}} xmm2 = [3,0,2,0] +; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1] +; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5,6,7] +; X86-AVX2-NEXT: vmovaps %ymm0, (%eax) +; X86-AVX2-NEXT: vmovaps %ymm1, (%eax) +; X86-AVX2-NEXT: vzeroupper +; X86-AVX2-NEXT: retl +; +; X86-AVX512-LABEL: PR63030: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512-NEXT: vmovdqa (%eax), %xmm0 +; X86-AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,8,0,0,0,0,0,0,0,9,0,1,0,1,0] +; X86-AVX512-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm1 +; X86-AVX512-NEXT: vmovdqa64 %zmm1, (%eax) +; X86-AVX512-NEXT: vzeroupper +; X86-AVX512-NEXT: retl +; +; X64-AVX2-LABEL: PR63030: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovaps (%rdi), %xmm0 +; X64-AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [3,2,3,2] +; X64-AVX2-NEXT: # xmm1 = mem[0,0] +; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[1,1,0,0] +; X64-AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3],ymm2[4,5,6,7] +; X64-AVX2-NEXT: vmovaps {{.*#+}} xmm2 = [3,2] +; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1] +; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5,6,7] +; X64-AVX2-NEXT: vmovaps %ymm0, (%rax) +; X64-AVX2-NEXT: vmovaps %ymm1, (%rax) +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: PR63030: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,8,0,0,0,9,1,1] +; X64-AVX512-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; X64-AVX512-NEXT: vmovdqa64 %zmm1, (%rax) +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %load = load <2 x i64>, ptr %p0, align 16 + %shuffle = shufflevector <2 x i64> , <2 x i64> %load, <8 x i32> + store volatile <8 x i64> %shuffle, ptr poison, align 64 + ret void +} + define void @packss_zext_v8i1() { ; X86-LABEL: packss_zext_v8i1: ; X86: # %bb.0: