From f43b5100150f21ab4cd9c4591b6a219f9cc3ba4c Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 20 Dec 2018 12:59:05 +0000 Subject: [PATCH] [SystemZ] Make better use of VLDEB We already have special code (DAG combine support for FP_ROUND) to recognize cases where we an use a vector version of VLEDB to perform two floating-point truncates in parallel, but equivalent support for VLEDB (vector floating-point extends) has been missing so far. This patch adds corresponding DAG combine support for FP_EXTEND. llvm-svn: 349746 --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 51 ++++++++++++++++++++++++- llvm/lib/Target/SystemZ/SystemZISelLowering.h | 1 + llvm/test/CodeGen/SystemZ/vec-conv-02.ll | 17 +++++++-- 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index d7951ca..cdec66b 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -527,6 +527,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::FP_EXTEND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::SDIV); setTargetDAGCombine(ISD::UDIV); @@ -5485,7 +5486,7 @@ SDValue SystemZTargetLowering::combineFP_ROUND( // (fpround (extract_vector_elt X 0)) // (fpround (extract_vector_elt X 1)) -> // (extract_vector_elt (VROUND X) 0) - // (extract_vector_elt (VROUND X) 1) + // (extract_vector_elt (VROUND X) 2) // // This is a special case since the target doesn't really support v2f32s. SelectionDAG &DAG = DCI.DAG; @@ -5527,6 +5528,53 @@ SDValue SystemZTargetLowering::combineFP_ROUND( return SDValue(); } +SDValue SystemZTargetLowering::combineFP_EXTEND( + SDNode *N, DAGCombinerInfo &DCI) const { + // (fpextend (extract_vector_elt X 0)) + // (fpextend (extract_vector_elt X 2)) -> + // (extract_vector_elt (VEXTEND X) 0) + // (extract_vector_elt (VEXTEND X) 1) + // + // This is a special case since the target doesn't really support v2f32s. + SelectionDAG &DAG = DCI.DAG; + SDValue Op0 = N->getOperand(0); + if (N->getValueType(0) == MVT::f64 && + Op0.hasOneUse() && + Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getValueType() == MVT::v4f32 && + Op0.getOperand(1).getOpcode() == ISD::Constant && + cast(Op0.getOperand(1))->getZExtValue() == 0) { + SDValue Vec = Op0.getOperand(0); + for (auto *U : Vec->uses()) { + if (U != Op0.getNode() && + U->hasOneUse() && + U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + U->getOperand(0) == Vec && + U->getOperand(1).getOpcode() == ISD::Constant && + cast(U->getOperand(1))->getZExtValue() == 2) { + SDValue OtherExtend = SDValue(*U->use_begin(), 0); + if (OtherExtend.getOpcode() == ISD::FP_EXTEND && + OtherExtend.getOperand(0) == SDValue(U, 0) && + OtherExtend.getValueType() == MVT::f64) { + SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), + MVT::v2f64, Vec); + DCI.AddToWorklist(VExtend.getNode()); + SDValue Extract1 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, + VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); + DCI.AddToWorklist(Extract1.getNode()); + DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); + SDValue Extract0 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, + VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + return Extract0; + } + } + } + } + return SDValue(); +} + SDValue SystemZTargetLowering::combineBSWAP( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -5745,6 +5793,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); + case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 172dbee..622da32 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -592,6 +592,7 @@ private: SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/test/CodeGen/SystemZ/vec-conv-02.ll b/llvm/test/CodeGen/SystemZ/vec-conv-02.ll index ab84389..d4c0f72 100644 --- a/llvm/test/CodeGen/SystemZ/vec-conv-02.ll +++ b/llvm/test/CodeGen/SystemZ/vec-conv-02.ll @@ -15,19 +15,30 @@ define void @f1(<2 x double> %val, <2 x float> *%ptr) { ; Test conversion of an f64 in a vector register to an f32. define float @f2(<2 x double> %vec) { ; CHECK-LABEL: f2: -; CHECK: wledb %f0, %v24 +; CHECK: wledb %f0, %v24, 0, 0 ; CHECK: br %r14 %scalar = extractelement <2 x double> %vec, i32 0 %ret = fptrunc double %scalar to float ret float %ret } -; Test conversion of an f32 in a vector register to an f64. -define double @f3(<4 x float> %vec) { +; Test cases where even elements of a v4f32 are converted to f64s. +define <2 x double> @f3(<4 x float> %vec) { ; CHECK-LABEL: f3: +; CHECK: vldeb %v24, {{%v[0-9]+}} +; CHECK: br %r14 + %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> + %res = fpext <2 x float> %shuffle to <2 x double> + ret <2 x double> %res +} + +; Test conversion of an f32 in a vector register to an f64. +define double @f4(<4 x float> %vec) { +; CHECK-LABEL: f4: ; CHECK: wldeb %f0, %v24 ; CHECK: br %r14 %scalar = extractelement <4 x float> %vec, i32 0 %ret = fpext float %scalar to double ret double %ret } + -- 2.7.4