From 7cb5a51f386d9cbe1715e5006661f596edde4d54 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 1 May 2020 16:08:30 +0100 Subject: [PATCH] [DAG] SimplifyDemandedVectorElts - add INSERT_SUBVECTOR SimplifyMultipleUseDemandedBits handling --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 16 ++++++++++++++++ llvm/test/CodeGen/X86/oddshuffles.ll | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b1bf1b7..9148ce3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2438,6 +2438,22 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; KnownUndef.insertBits(SubUndef, SubIdx); KnownZero.insertBits(SubZero, SubIdx); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!BaseElts.isAllOnesValue() || !SubElts.isAllOnesValue()) { + APInt DemandedBits = APInt::getAllOnesValue(VT.getScalarSizeInBits()); + SDValue NewBase = SimplifyMultipleUseDemandedBits( + Base, DemandedBits, BaseElts, TLO.DAG, Depth + 1); + SDValue NewSub = SimplifyMultipleUseDemandedBits( + Sub, DemandedBits, SubElts, TLO.DAG, Depth + 1); + if (NewBase || NewSub) { + NewBase = NewBase ? NewBase : Base; + NewSub = NewSub ? NewSub : Sub; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewBase, + NewSub, Op.getOperand(2)); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::EXTRACT_SUBVECTOR: { diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index 82dbbeb..6bdc4e3 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -556,7 +556,7 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, <12 x i32>* %p) nounwind { ; AVX1-NEXT: vpermilps {{.*#+}} ymm3 = ymm0[0,u,u,1,5,u,u,6] ; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2,3,4,5],ymm2[6],ymm3[7] ; AVX1-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm3 ; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3] @@ -602,7 +602,7 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, <12 x i32>* %p) nounwind { ; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] ; XOP-NEXT: vpermil2ps {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[u,1,5,u],ymm2[6],ymm0[6] ; XOP-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[0,1,0,1] -; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm3 ; XOP-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7] ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 ; XOP-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3] -- 2.7.4