From a0372dec261d137973b29ed14376230c372b4290 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Dec 2016 03:17:27 +0000 Subject: [PATCH] [X86][InstCombine] Teach SimplifyDemandedVectorElts to handle scalar min/max/cmp intrinsics more correctly. Now we only pass bit 0 of the DemandedElts to optimize operand 1 as we recurse since the upper bits are unused. Also calculate UndefElts correctly. Simplify InstCombineCalls for these instrinics to just call SimplifyDemandedVectorElts for the call instrution to reuse this support. llvm-svn: 289628 --- .../Transforms/InstCombine/InstCombineCalls.cpp | 23 +++++----------- .../InstCombine/InstCombineSimplifyDemanded.cpp | 31 +++++++++++++++++++--- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 62e2de7..4c41484 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1760,23 +1760,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } - case Intrinsic::x86_sse_min_ss: - case Intrinsic::x86_sse_max_ss: - case Intrinsic::x86_sse_cmp_ss: - case Intrinsic::x86_sse2_min_sd: - case Intrinsic::x86_sse2_max_sd: - case Intrinsic::x86_sse2_cmp_sd: { - // These intrinsics only demand the lowest element of the second input - // vector. - Value *Arg1 = II->getArgOperand(1); - unsigned VWidth = Arg1->getType()->getVectorNumElements(); - if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { - II->setArgOperand(1, V); - return II; - } - break; - } - case Intrinsic::x86_fma_vfmadd_ss: case Intrinsic::x86_fma_vfmsub_ss: case Intrinsic::x86_fma_vfnmadd_ss: @@ -1837,6 +1820,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_sse_cmp_ss: + case Intrinsic::x86_sse_min_ss: + case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse2_cmp_sd: + case Intrinsic::x86_sse2_min_sd: + case Intrinsic::x86_sse2_max_sd: case Intrinsic::x86_xop_vfrcz_ss: case Intrinsic::x86_xop_vfrcz_sd: { unsigned VWidth = II->getType()->getVectorNumElements(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 979163c..3038fd5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1290,14 +1290,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // checks). break; - // Binary scalar-as-vector operations that work column-wise. A dest element - // is a function of the corresponding input elements from the two inputs. + // Binary scalar-as-vector operations that work column-wise. The high + // elements come from operand 0. The low element is a function of both + // operands. case Intrinsic::x86_sse_min_ss: case Intrinsic::x86_sse_max_ss: case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: - case Intrinsic::x86_sse2_cmp_sd: + case Intrinsic::x86_sse2_cmp_sd: { + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, + UndefElts, Depth + 1); + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + + // If lowest element of a scalar op isn't used then use Arg0. + if (!DemandedElts[0]) + return II->getArgOperand(0); + + // Only lower element is used for operand 1. + DemandedElts = 1; + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, + UndefElts2, Depth + 1); + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } + + // Lower element is undefined if both lower elements are undefined. + // Consider things like undef&0. The result is known zero, not undef. + if (!UndefElts2[0]) + UndefElts.clearBit(0); + + break; + } + + // Binary scalar-as-vector operations that work column-wise. A dest element + // is a function of the corresponding input elements from the two inputs. case Intrinsic::x86_sse41_round_ss: case Intrinsic::x86_sse41_round_sd: TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, -- 2.7.4