From d172842b5100926cc882617a1874afc09f5fbaaf Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 13 Jul 2022 17:35:38 +0100 Subject: [PATCH] [DAG] SimplifyDemandedVectorElts - adjust demanded elements for selection mask for known zero results If an element is known zero from both selections then it shouldn't matter what the selection mask element is. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 ++ .../CodeGen/SelectionDAG/TargetLowering.cpp | 31 ++++++++++++------- llvm/test/CodeGen/X86/vselect-constants.ll | 15 +++------ 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f2c94a8707dc..2654c00929d8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11053,6 +11053,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) return V; + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5225d028c103..d31ee5e335f9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3052,15 +3052,15 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::VSELECT: { + SDValue Sel = Op.getOperand(0); + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + // Try to transform the select condition based on the current demanded // elements. - // TODO: If a condition element is undef, we can choose from one arm of the - // select (and if one arm is undef, then we can propagate that to the - // result). - // TODO - add support for constant vselect masks (see IR version of this). - APInt UnusedUndef, UnusedZero; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef, - UnusedZero, TLO, Depth + 1)) + APInt UndefSel, UndefZero; + if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO, + Depth + 1)) return true; // See if we can simplify either vselect operand. @@ -3068,15 +3068,24 @@ bool TargetLowering::SimplifyDemandedVectorElts( APInt DemandedRHS(DemandedElts); APInt UndefLHS, ZeroLHS; APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; KnownUndef = UndefLHS & UndefRHS; KnownZero = ZeroLHS & ZeroRHS; + + // If we know that the selected element is always zero, we don't need the + // select value element. + APInt DemandedSel = DemandedElts & ~KnownZero; + if (DemandedSel != DemandedElts) + if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO, + Depth + 1)) + return true; + break; } case ISD::VECTOR_SHUFFLE: { diff --git a/llvm/test/CodeGen/X86/vselect-constants.ll b/llvm/test/CodeGen/X86/vselect-constants.ll index f95eccae188a..0630a40b8809 100644 --- a/llvm/test/CodeGen/X86/vselect-constants.ll +++ b/llvm/test/CodeGen/X86/vselect-constants.ll @@ -282,13 +282,9 @@ define i32 @wrong_min_signbits(<2 x i16> %x) { ; SSE-NEXT: pxor %xmm1, %xmm1 ; SSE-NEXT: pcmpeqw %xmm0, %xmm1 ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [1,0,0,0] -; SSE-NEXT: pandn %xmm0, %xmm1 -; SSE-NEXT: psllw $15, %xmm1 -; SSE-NEXT: psraw $15, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm2 -; SSE-NEXT: pandn %xmm0, %xmm2 -; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE-NEXT: por %xmm2, %xmm1 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %eax ; SSE-NEXT: retq ; @@ -296,10 +292,7 @@ define i32 @wrong_min_signbits(<2 x i16> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,0,0,0] -; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [2,0,0,0] ; AVX-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq -- 2.34.1