From: Nemanja Ivanovic Date: Tue, 29 Nov 2016 23:36:03 +0000 (+0000) Subject: [PowerPC] Improvements for BUILD_VECTOR Vol. 2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8c11e79b1773ba484db0a710aa90ebffbfea550c;p=platform%2Fupstream%2Fllvm.git [PowerPC] Improvements for BUILD_VECTOR Vol. 2 This patch corresponds to review: https://reviews.llvm.org/D25980 This is the 2nd patch in a series of 4 that improve the lowering and combining for BUILD_VECTOR nodes on PowerPC. This particular patch combines a build vector of fp-to-int conversions into an fp-to-int conversion of a build vector of fp values. For example: Converts (build_vector (fp_to_[su]i $A), (fp_to_[su]i $B), ...) Into (fp_to_[su]i (build_vector $A, $B, ...))). Which is a natural match for much cleaner code. llvm-svn: 288218 --- diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 4797fe3..73572e9 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10670,6 +10670,86 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, ShiftCst); } +/// \brief Reduces the number of fp-to-int conversion when building a vector. +/// +/// If this vector is built out of floating to integer conversions, +/// transform it to a vector built out of floating point values followed by a +/// single floating to integer conversion of the vector. +/// Namely (build_vector (fptosi $A), (fptosi $B), ...) +/// becomes (fptosi (build_vector ($A, $B, ...))) +SDValue PPCTargetLowering:: +combineElementTruncationToVectorTruncation(SDNode *N, + DAGCombinerInfo &DCI) const { + assert(N->getOpcode() == ISD::BUILD_VECTOR && + "Should be called with a BUILD_VECTOR node"); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + SDValue FirstInput = N->getOperand(0); + assert(FirstInput.getOpcode() == PPCISD::MFVSR && + "The input operand must be an fp-to-int conversion."); + + // This combine happens after legalization so the fp_to_[su]i nodes are + // already converted to PPCSISD nodes. + unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); + if (FirstConversion == PPCISD::FCTIDZ || + FirstConversion == PPCISD::FCTIDUZ || + FirstConversion == PPCISD::FCTIWZ || + FirstConversion == PPCISD::FCTIWUZ) { + bool IsSplat = true; + bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || + FirstConversion == PPCISD::FCTIWUZ; + EVT SrcVT = FirstInput.getOperand(0).getValueType(); + SmallVector Ops; + EVT TargetVT = N->getValueType(0); + for (int i = 0, e = N->getNumOperands(); i < e; ++i) { + if (N->getOperand(i).getOpcode() != PPCISD::MFVSR) + return SDValue(); + unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode(); + if (NextConversion != FirstConversion) + return SDValue(); + if (N->getOperand(i) != FirstInput) + IsSplat = false; + } + + // If this is a splat, we leave it as-is since there will be only a single + // fp-to-int conversion followed by a splat of the integer. This is better + // for 32-bit and smaller ints and neutral for 64-bit ints. + if (IsSplat) + return SDValue(); + + // Now that we know we have the right type of node, get its operands + for (int i = 0, e = N->getNumOperands(); i < e; ++i) { + SDValue In = N->getOperand(i).getOperand(0); + // For 32-bit values, we need to add an FP_ROUND node. + if (Is32Bit) { + if (In.isUndef()) + Ops.push_back(DAG.getUNDEF(SrcVT)); + else { + SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, + MVT::f32, In.getOperand(0), + DAG.getIntPtrConstant(1, dl)); + Ops.push_back(Trunc); + } + } else + Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0)); + } + + unsigned Opcode; + if (FirstConversion == PPCISD::FCTIDZ || + FirstConversion == PPCISD::FCTIWZ) + Opcode = ISD::FP_TO_SINT; + else + Opcode = ISD::FP_TO_UINT; + + EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32; + SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); + return DAG.getNode(Opcode, dl, TargetVT, BV); + } + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -10677,7 +10757,20 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); - if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX()) + + if (!Subtarget.hasVSX()) + return SDValue(); + + // The target independent DAG combiner will leave a build_vector of + // float-to-int conversions intact. We can generate MUCH better code for + // a float-to-int conversion of a vector of floats. + SDValue FirstInput = N->getOperand(0); + if (FirstInput.getOpcode() == PPCISD::MFVSR) { + SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI); + if (Reduced) + return Reduced; + } + if (N->getValueType(0) != MVT::v2f64) return SDValue(); // Looking for: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 1723d12..95f32f5 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -990,6 +990,10 @@ namespace llvm { unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; + + SDValue + combineElementTruncationToVectorTruncation(SDNode *N, + DAGCombinerInfo &DCI) const; }; namespace PPC {