From 843d43e62ae132b723a2ba9606299ba2159c8949 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 21 Jun 2022 11:47:51 +0100 Subject: [PATCH] [X86] computeKnownBitsForTargetNode - add X86ISD::VBROADCAST_LOAD handling This requires us to override the isTargetCanonicalConstantNode callback introduced in D128144, so we can recognise the various cases where a VBROADCAST_LOAD constant is being reused at different vector widths to prevent infinite loops. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 22 ++++++++++++++++++++++ llvm/lib/Target/X86/X86ISelLowering.h | 13 +++++++++++++ ...ower-of-two-or-zero-when-comparing-with-zero.ll | 4 +--- llvm/test/CodeGen/X86/var-permute-256.ll | 5 ----- llvm/test/CodeGen/X86/vector-pack-128.ll | 4 +--- .../X86/vector_splat-const-shift-of-constmasked.ll | 2 -- 6 files changed, 37 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 289c285..2c192db 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36732,6 +36732,28 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.setAllZero(); break; } + case X86ISD::VBROADCAST_LOAD: { + APInt UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(Op, BitWidth, UndefElts, EltBits, + /*AllowWholeUndefs*/ false, + /*AllowPartialUndefs*/ false)) { + Known.Zero.setAllBits(); + Known.One.setAllBits(); + for (unsigned I = 0; I != NumElts; ++I) { + if (!DemandedElts[I]) + continue; + if (UndefElts[I]) { + Known.resetAll(); + break; + } + KnownBits Known2 = KnownBits::makeConstant(EltBits[I]); + Known = KnownBits::commonBits(Known, Known2); + } + return; + } + break; + } } // Handle target shuffles. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 7f2f2a4..af11088 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1160,6 +1160,19 @@ namespace llvm { APInt &UndefElts, unsigned Depth) const override; + bool isTargetCanonicalConstantNode(SDValue Op) const override { + // Peek through bitcasts/extracts/inserts to see if we have a broadcast + // vector from memory. + while (Op.getOpcode() == ISD::BITCAST || + Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || + (Op.getOpcode() == ISD::INSERT_SUBVECTOR && + Op.getOperand(0).isUndef())) + Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); + + return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || + TargetLowering::isTargetCanonicalConstantNode(Op); + } + const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; SDValue unwrapAddress(SDValue N) const override; diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll index 85ed998..90889f9 100644 --- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -222,9 +222,7 @@ define <4 x i1> @p6_vector_urem_by_const__nonsplat_undef0(<4 x i32> %x, <4 x i32 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531] ; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $1, %xmm0, %xmm1 -; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882] ; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/var-permute-256.ll b/llvm/test/CodeGen/X86/var-permute-256.ll index 5ffeb90..6af3126 100644 --- a/llvm/test/CodeGen/X86/var-permute-256.ll +++ b/llvm/test/CodeGen/X86/var-permute-256.ll @@ -1184,8 +1184,6 @@ define <4 x i64> @PR50356(<4 x i64> %0, <4 x i32> %1, <4 x i64> %2) unnamed_addr ; AVX2-NEXT: movq %rsp, %rbp ; AVX2-NEXT: andq $-32, %rsp ; AVX2-NEXT: subq $64, %rsp -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [7,7,7,7] -; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX2-NEXT: vmovd %xmm1, %eax ; AVX2-NEXT: vmovaps %ymm0, (%rsp) ; AVX2-NEXT: andl $3, %eax @@ -1208,8 +1206,6 @@ define <4 x i64> @PR50356(<4 x i64> %0, <4 x i32> %1, <4 x i64> %2) unnamed_addr ; AVX512-NEXT: andq $-32, %rsp ; AVX512-NEXT: subq $64, %rsp ; AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 -; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm3 = [7,7,7,7] -; AVX512-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX512-NEXT: vmovd %xmm1, %eax ; AVX512-NEXT: vmovaps %ymm0, (%rsp) ; AVX512-NEXT: andl $3, %eax @@ -1233,7 +1229,6 @@ define <4 x i64> @PR50356(<4 x i64> %0, <4 x i32> %1, <4 x i64> %2) unnamed_addr ; AVX512VL-NEXT: movq %rsp, %rbp ; AVX512VL-NEXT: andq $-32, %rsp ; AVX512VL-NEXT: subq $64, %rsp -; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovd %xmm1, %eax ; AVX512VL-NEXT: vmovaps %ymm0, (%rsp) ; AVX512VL-NEXT: andl $3, %eax diff --git a/llvm/test/CodeGen/X86/vector-pack-128.ll b/llvm/test/CodeGen/X86/vector-pack-128.ll index 4f6c795..b88bb46 100644 --- a/llvm/test/CodeGen/X86/vector-pack-128.ll +++ b/llvm/test/CodeGen/X86/vector-pack-128.ll @@ -160,9 +160,7 @@ define <8 x i16> @concat_trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwi ; AVX2-NEXT: vpsrad $17, %xmm0, %xmm0 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpackusdw %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: concat_trunc_packssdw_128: diff --git a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll index 608522e..d24fc9a 100644 --- a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll @@ -3076,8 +3076,6 @@ define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_1(<2 x i64> %a ; ; X86-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294] -; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpsrad $1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] -- 2.7.4