From: Simon Pilgrim Date: Sat, 6 Oct 2018 14:51:14 +0000 (+0000) Subject: [X86] combinePMULDQ - add op back to worklist if SimplifyDemandedBits succeeds on... X-Git-Tag: llvmorg-8.0.0-rc1~7102 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=62d199f4e53f39ee5e797ee6e4098907c25135f2;p=platform%2Fupstream%2Fllvm.git [X86] combinePMULDQ - add op back to worklist if SimplifyDemandedBits succeeds on either operand Prevents missing other simplifications that may occur deep in the operand chain where CommitTargetLoweringOpt won't add the PMULDQ back to the worklist itself llvm-svn: 343922 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cb4eea7..e113dbc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40317,10 +40317,14 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, APInt DemandedMask(APInt::getLowBitsSet(64, 32)); // PMULQDQ/PMULUDQ only uses lower 32 bits from each vector element. - if (TLI.SimplifyDemandedBits(LHS, DemandedMask, DCI)) + if (TLI.SimplifyDemandedBits(LHS, DemandedMask, DCI)) { + DCI.AddToWorklist(N); return SDValue(N, 0); - if (TLI.SimplifyDemandedBits(RHS, DemandedMask, DCI)) + } + if (TLI.SimplifyDemandedBits(RHS, DemandedMask, DCI)) { + DCI.AddToWorklist(N); return SDValue(N, 0); + } return SDValue(); } diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll index 3bdd5a3..c735b20 100644 --- a/llvm/test/CodeGen/X86/combine-pmuldq.ll +++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll @@ -47,26 +47,10 @@ define <2 x i64> @combine_shuffle_zero_pmuludq(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-NEXT: pmuludq %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX2-LABEL: combine_shuffle_zero_pmuludq: -; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: combine_shuffle_zero_pmuludq: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq: -; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX512DQVL-NEXT: retq +; AVX-LABEL: combine_shuffle_zero_pmuludq: +; AVX: # %bb.0: +; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> %2 = shufflevector <4 x i32> %a1, <4 x i32> zeroinitializer, <4 x i32> %3 = bitcast <4 x i32> %1 to <2 x i64> @@ -84,22 +68,16 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) ; ; AVX2-LABEL: combine_shuffle_zero_pmuludq_256: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] ; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: combine_shuffle_zero_pmuludq_256: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] ; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq_256: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] ; AVX512DQVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: retq %1 = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32>