From 150d9f3187252d6ca872456684ace1f958440ce1 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 22 Jan 2015 12:07:59 +0000 Subject: [PATCH] Fixed a bug in type legalizer for masked load/store intrinsics. The problem occurs when after vectorization we have type <2 x i32>. This type is promoted to <2 x i64> and then requires additional efforts for expanding loads and truncating stores. I added EXPAND / TRUNCATE attributes to the masked load/store SDNodes. The code now contains additional shuffles. I've prepared changes in the cost estimation for masked memory operations, it will be submitted separately. llvm-svn: 226808 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 6 +- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 31 ++-- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 +- .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 25 ++-- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 62 ++++++-- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 13 +- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 6 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 164 +++++++++++++++++++++ llvm/test/CodeGen/X86/masked_memop.ll | 7 +- 10 files changed, 277 insertions(+), 54 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 4950797bb..ee4bc08 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -867,9 +867,11 @@ public: SDValue Offset, ISD::MemIndexedMode AM); SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, - SDValue Mask, SDValue Src0, MachineMemOperand *MMO); + SDValue Mask, SDValue Src0, EVT MemVT, + MachineMemOperand *MMO, ISD::LoadExtType); SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, - SDValue Ptr, SDValue Mask, MachineMemOperand *MMO); + SDValue Ptr, SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, bool IsTrunc); /// getSrcValue - Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 8e7fd54..933ec7f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1970,13 +1970,17 @@ public: class MaskedLoadSDNode : public MaskedLoadStoreSDNode { public: friend class SelectionDAG; - MaskedLoadSDNode(unsigned Order, DebugLoc dl, - SDValue *Operands, unsigned numOperands, - SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + MaskedLoadSDNode(unsigned Order, DebugLoc dl, SDValue *Operands, + unsigned numOperands, SDVTList VTs, ISD::LoadExtType ETy, + EVT MemVT, MachineMemOperand *MMO) : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands, - VTs, MemVT, MMO) - {} + VTs, MemVT, MMO) { + SubclassData |= (unsigned short)ETy; + } + ISD::LoadExtType getExtensionType() const { + return ISD::LoadExtType(SubclassData & 3); + } const SDValue &getSrc0() const { return getOperand(3); } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MLOAD; @@ -1989,14 +1993,19 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode { public: friend class SelectionDAG; - MaskedStoreSDNode(unsigned Order, DebugLoc dl, - SDValue *Operands, unsigned numOperands, - SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + MaskedStoreSDNode(unsigned Order, DebugLoc dl, SDValue *Operands, + unsigned numOperands, SDVTList VTs, bool isTrunc, EVT MemVT, + MachineMemOperand *MMO) : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands, - VTs, MemVT, MMO) - {} + VTs, MemVT, MMO) { + SubclassData |= (unsigned short)isTrunc; + } + /// isTruncatingStore - Return true if the op does a truncation before store. + /// For integers this is the same as doing a TRUNCATE and storing the result. + /// For floats, it is the same as doing an FP_ROUND and storing the result. + bool isTruncatingStore() const { return SubclassData & 1; } - const SDValue &getData() const { return getOperand(3); } + const SDValue &getValue() const { return getOperand(3); } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MSTORE; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6bd38ae..a06f35e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4853,7 +4853,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = dyn_cast(N); SDValue Mask = MST->getMask(); - SDValue Data = MST->getData(); + SDValue Data = MST->getValue(); SDLoc DL(N); // If the MSTORE data type requires splitting and the mask is provided by a @@ -4896,7 +4896,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MST->getAAInfo(), MST->getRanges()); - Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO); + Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, + MST->isTruncatingStore()); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -4908,7 +4909,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { SecondHalfAlignment, MST->getAAInfo(), MST->getRanges()); - Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO); + Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, + MST->isTruncatingStore()); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -4969,7 +4971,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO); + Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, + ISD::NON_EXTLOAD); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -4980,7 +4983,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO); + Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, + ISD::NON_EXTLOAD); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -9497,7 +9501,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; unsigned NewBW = NextPowerOf2(MSB - ShAmt); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); - // The narrowwing should be profitable, the load/store operation should be + // The narrowing should be profitable, the load/store operation should be // legal (or custom) and the store size should be equal to the NewVT width. while (NewBW < BitWidth && (NewVT.getStoreSizeInBits() != NewBW || diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 82b114b..a4e44cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -458,16 +458,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); - SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT); - SDLoc dl(N); - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOLoad, NVT.getStoreSize(), - N->getAlignment(), N->getAAInfo(), N->getRanges()); + SDValue Mask = N->getMask(); + EVT NewMaskVT = getSetCCResultType(NVT); + if (NewMaskVT != N->getMask().getValueType()) + Mask = PromoteTargetBoolean(Mask, NewMaskVT); + SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - ExtMask, ExtSrc0, MMO); + Mask, ExtSrc0, N->getMemoryVT(), + N->getMemOperand(), ISD::SEXTLOAD); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -1117,16 +1117,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ assert(OpNo == 2 && "Only know how to promote the mask!"); - SDValue DataOp = N->getData(); + SDValue DataOp = N->getValue(); EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); SDLoc dl(N); + bool TruncateStore = false; if (!TLI.isTypeLegal(DataVT)) { if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { DataOp = GetPromotedInteger(DataOp); Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + TruncateStore = true; } else { assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector && @@ -1156,10 +1158,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN } else Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType()); - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[2] = Mask; - NewOps[3] = DataOp; - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, + N->getMemoryVT(), N->getMemOperand(), + TruncateStore); } SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1cd9f40..cef3fc9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -659,6 +659,7 @@ private: SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 96b69ee..63671f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -992,6 +992,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue Ptr = MLD->getBasePtr(); SDValue Mask = MLD->getMask(); unsigned Alignment = MLD->getOriginalAlignment(); + ISD::LoadExtType ExtType = MLD->getExtensionType(); // if Alignment is equal to the vector size, // take the half of it for the second part @@ -1015,7 +1016,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO); + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, + ExtType); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -1026,7 +1028,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO); + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, + ExtType); // Build a factor node to remember that this load is independent of the @@ -1464,7 +1467,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); SDValue Mask = N->getMask(); - SDValue Data = N->getData(); + SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); SDLoc DL(N); @@ -1489,7 +1492,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO); + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, + N->isTruncatingStore()); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -1500,7 +1504,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), N->getRanges()); - Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO); + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, + N->isTruncatingStore()); // Build a factor node to remember that this store is independent of the @@ -2412,6 +2417,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); SDValue Src0 = GetWidenedVector(N->getSrc0()); + ISD::LoadExtType ExtType = N->getExtensionType(); SDLoc dl(N); if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) @@ -2434,14 +2440,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); } - // Rebuild memory operand because MemoryVT was changed - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOLoad, WidenVT.getStoreSize(), - N->getAlignment(), N->getAAInfo(), N->getRanges()); - SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), - Mask, Src0, MMO); + Mask, Src0, N->getMemoryVT(), + N->getMemOperand(), ExtType); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -2593,6 +2594,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::ANY_EXTEND: @@ -2791,6 +2793,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); } +SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { + MaskedStoreSDNode *MST = cast(N); + SDValue Mask = MST->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue StVal = MST->getValue(); + // Widen the value + SDValue WideVal = GetWidenedVector(StVal); + SDLoc dl(N); + + if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + // The mask should be widened as well + EVT BoolVT = getSetCCResultType(WideVal.getValueType()); + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + assert(Mask.getValueType().getVectorNumElements() == + WideVal.getValueType().getVectorNumElements() && + "Mask and data vectors should have the same number of elements"); + return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(), + Mask, MST->getMemoryVT(), MST->getMemOperand(), + false); +} + SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c819516..f75d5f4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4924,15 +4924,15 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, SDValue SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue Src0, - MachineMemOperand *MMO) { + SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, + MachineMemOperand *MMO, ISD::LoadExtType ExtTy) { SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, + ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); @@ -4944,14 +4944,15 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, } SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), dl.getDebugLoc(), Ops, 4, VTs, - VT, MMO); + ExtTy, MemVT, MMO); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, - SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) { + SDValue Ptr, SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, bool isTrunc) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); EVT VT = Val.getValueType(); @@ -4970,7 +4971,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, } SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), dl.getDebugLoc(), Ops, 4, - VTs, VT, MMO); + VTs, isTrunc, MemVT, MMO); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ea76bc6..4566792 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3697,7 +3697,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { getMachineMemOperand(MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); - SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO); + SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, + MMO, false); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } @@ -3736,7 +3737,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { MachineMemOperand::MOLoad, VT.getStoreSize(), Alignment, AAInfo, Ranges); - SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO); + SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, + ISD::NON_EXTLOAD); SDValue OutChain = Load.getValue(1); DAG.setRoot(OutChain); setValue(&I, Load); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 21d43ec..4871bc0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1709,7 +1709,9 @@ void X86TargetLowering::resetOperationActions() { setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::LOAD); + setTargetDAGCombine(ISD::MLOAD); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::MSTORE); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); @@ -24796,6 +24798,166 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformMLOADCombine - Resolve extending loads +static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + MaskedLoadSDNode *Mld = cast(N); + if (Mld->getExtensionType() != ISD::SEXTLOAD) + return SDValue(); + + EVT VT = Mld->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned NumElems = VT.getVectorNumElements(); + EVT LdVT = Mld->getMemoryVT(); + SDLoc dl(Mld); + + assert(LdVT != VT && "Cannot extend to the same type"); + unsigned ToSz = VT.getVectorElementType().getSizeInBits(); + unsigned FromSz = LdVT.getVectorElementType().getSizeInBits(); + // From, To sizes and ElemCount must be pow of two + assert (isPowerOf2_32(NumElems * FromSz * ToSz) && + "Unexpected size for extending masked load"); + + unsigned SizeRatio = ToSz / FromSz; + assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), + LdVT.getScalarType(), NumElems*SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + // Convert Src0 value + SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0()); + if (Mld->getSrc0().getOpcode() != ISD::UNDEF) { + SmallVector ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i != NumElems; ++i) + ShuffleVec[i] = i * SizeRatio; + + // Can't shuffle using an illegal type. + assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal"); + WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0, + DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); + } + // Prepare the new mask + SDValue NewMask; + SDValue Mask = Mld->getMask(); + if (Mask.getValueType() == VT) { + // Mask and original value have the same type + NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask); + SmallVector ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i != NumElems; ++i) + ShuffleVec[i] = i * SizeRatio; + for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) + ShuffleVec[i] = NumElems*SizeRatio; + NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, + DAG.getConstant(0, WideVecVT), + &ShuffleVec[0]); + } + else { + assert(Mask.getValueType().getVectorElementType() == MVT::i1); + unsigned WidenNumElts = NumElems*SizeRatio; + unsigned MaskNumElts = VT.getVectorNumElements(); + EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WidenNumElts); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType()); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); + } + + SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(), + Mld->getBasePtr(), NewMask, WideSrc0, + Mld->getMemoryVT(), Mld->getMemOperand(), + ISD::NON_EXTLOAD); + SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd); + return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true); + +} +/// PerformMSTORECombine - Resolve truncating stores +static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + MaskedStoreSDNode *Mst = cast(N); + if (!Mst->isTruncatingStore()) + return SDValue(); + + EVT VT = Mst->getValue().getValueType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned NumElems = VT.getVectorNumElements(); + EVT StVT = Mst->getMemoryVT(); + SDLoc dl(Mst); + + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + assert (isPowerOf2_32(NumElems * FromSz * ToSz) && + "Unexpected size for truncating masked store"); + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + assert (((NumElems * FromSz) % ToSz) == 0 && + "Unexpected ratio for truncating masked store"); + + unsigned SizeRatio = FromSz / ToSz; + assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), + StVT.getScalarType(), NumElems*SizeRatio); + + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue()); + SmallVector ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i != NumElems; ++i) + ShuffleVec[i] = i * SizeRatio; + + // Can't shuffle using an illegal type. + assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal"); + + SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec, + DAG.getUNDEF(WideVecVT), + &ShuffleVec[0]); + + SDValue NewMask; + SDValue Mask = Mst->getMask(); + if (Mask.getValueType() == VT) { + // Mask and original value have the same type + NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask); + for (unsigned i = 0; i != NumElems; ++i) + ShuffleVec[i] = i * SizeRatio; + for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) + ShuffleVec[i] = NumElems*SizeRatio; + NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, + DAG.getConstant(0, WideVecVT), + &ShuffleVec[0]); + } + else { + assert(Mask.getValueType().getVectorElementType() == MVT::i1); + unsigned WidenNumElts = NumElems*SizeRatio; + unsigned MaskNumElts = VT.getVectorNumElements(); + EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WidenNumElts); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType()); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); + } + + return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(), + NewMask, StVT, Mst->getMemOperand(), false); +} /// PerformSTORECombine - Do target-specific dag combines on STORE nodes. static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -25894,7 +26056,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget); case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget); + case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); + case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index 726d712..b5ff630 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -159,7 +159,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { } ; AVX2-LABEL: test15 -; AVX2: vpmaskmovq +; AVX2: vpmaskmovd define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { %mask = icmp eq <2 x i32> %trigger, zeroinitializer call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask) @@ -176,8 +176,9 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> % } ; AVX2-LABEL: test17 -; AVX2: vpmaskmovq -; AVX2: vblendvpd +; AVX2: vpmaskmovd +; AVX2: vblendvps +; AVX2: vpmovsxdq define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { %mask = icmp eq <2 x i32> %trigger, zeroinitializer %res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) -- 2.7.4