From 06de4264260d24761e3d1e6ab815db0370b1d314 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 3 Mar 2020 23:45:59 -0800 Subject: [PATCH] [X86] Directly form VBROADCAST_LOAD in lowerShuffleAsBroadcast on AVX targets. If we would emit a VBROADCAST node, we can instead directly emit a VBROADCAST_LOAD. This allows us to get rid of the special case to use an f64 load on 32-bit targets for vXi64. I believe there is more cleanup we can do later in this function, but I'll do that in follow ups. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 606df57..d0696c2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12980,14 +12980,6 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, // is expected to be a win for code size, register pressure, and possibly // uops even if the original vector load is not eliminated. - // 32-bit targets need to load i64 as a f64 and then bitcast the result. - if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) { - BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements()); - Opcode = (BroadcastVT.is128BitVector() && !Subtarget.hasAVX2()) - ? X86ISD::MOVDDUP - : Opcode; - } - // Reduce the vector load and shuffle to a broadcasted scalar load. LoadSDNode *Ld = cast(V); SDValue BaseAddr = Ld->getOperand(1); @@ -12995,6 +12987,21 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, unsigned Offset = BroadcastIdx * SVT.getStoreSize(); assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset"); SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + + // Directly form VBROADCAST_LOAD if we're using VBROADCAST opcode rather + // than MOVDDUP. + // FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX? + if (Opcode == X86ISD::VBROADCAST) { + SDVTList Tys = DAG.getVTList(BroadcastVT, MVT::Other); + SDValue Ops[] = {Ld->getChain(), NewAddr}; + V = DAG.getMemIntrinsicNode( + X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT, + DAG.getMachineFunction().getMachineMemOperand( + Ld->getMemOperand(), Offset, SVT.getStoreSize())); + DAG.makeEquivalentMemoryOrdering(Ld, V); + return DAG.getBitcast(VT, V); + } + assert(SVT == MVT::f64 && "Unexpected VT!"); V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); -- 2.7.4