From c175d880a4694a66bc8be8a0e5ce333fbc14cf47 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 22 Oct 2022 21:33:37 +0100 Subject: [PATCH] [X86] Add freeze(pshufd/permilps(x,imm)) -> pshufd/permilps(freeze(x),imm) folding Add X86 isGuaranteedNotToBeUndefOrPoisonForTargetNode / canCreateUndefOrPoisonForTargetNode overrides and add X86ISD::PSHUFD/VPERMILPI handling. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 40 +++++++++++++++++++++++++++++++++ llvm/lib/Target/X86/X86ISelLowering.h | 8 +++++++ llvm/test/CodeGen/X86/freeze-vector.ll | 4 ---- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index aea2173..71e643e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42550,6 +42550,46 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( Op, DemandedBits, DemandedElts, DAG, Depth); } +bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, unsigned Depth) const { + unsigned EltsBits = Op.getScalarValueSizeInBits(); + unsigned NumElts = DemandedElts.getBitWidth(); + + // TODO: Add more target shuffles. + switch (Op.getOpcode()) { + case X86ISD::PSHUFD: + case X86ISD::VPERMILPI: { + SmallVector Mask; + DecodePSHUFMask(NumElts, EltsBits, Op.getConstantOperandVal(1), Mask); + + APInt DemandedSrcElts = APInt::getZero(NumElts); + for (unsigned I = 0; I != NumElts; ++I) + if (DemandedElts[I]) + DemandedSrcElts.setBit(Mask[I]); + + return DAG.isGuaranteedNotToBeUndefOrPoison( + Op.getOperand(0), DemandedSrcElts, PoisonOnly, Depth + 1); + } + } + return TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( + Op, DemandedElts, DAG, PoisonOnly, Depth); +} + +bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { + + // TODO: Add more target shuffles. + switch (Op.getOpcode()) { + case X86ISD::PSHUFD: + case X86ISD::VPERMILPI: + return false; + } + return TargetLowering::canCreateUndefOrPoisonForTargetNode( + Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth); +} + bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 2d621b9..f843795 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1158,6 +1158,14 @@ namespace llvm { SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const override; + bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, unsigned Depth) const override; + + bool canCreateUndefOrPoisonForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override; + bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth) const override; diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll index 6e51ed2..2ab56c0 100644 --- a/llvm/test/CodeGen/X86/freeze-vector.ll +++ b/llvm/test/CodeGen/X86/freeze-vector.ll @@ -16,8 +16,6 @@ define <4 x i32> @freeze_insert_subvector(<8 x i32> %a0) nounwind { define <4 x i32> @freeze_pshufd(<4 x i32> %a0) nounwind { ; CHECK-LABEL: freeze_pshufd: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; CHECK-NEXT: ret{{[l|q]}} %x = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> %y = freeze <4 x i32> %x @@ -28,8 +26,6 @@ define <4 x i32> @freeze_pshufd(<4 x i32> %a0) nounwind { define <4 x float> @freeze_permilps(<4 x float> %a0) nounwind { ; CHECK-LABEL: freeze_permilps: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; CHECK-NEXT: ret{{[l|q]}} %x = shufflevector <4 x float> %a0, <4 x float> poison, <4 x i32> %y = freeze <4 x float> %x -- 2.7.4