From c6462aacd58021f4f9c38d2d196dcbe80d68302e Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Wed, 14 Nov 2012 19:19:16 +0000 Subject: [PATCH] [NVPTX] Implement custom lowering of loads/stores for i1 Loads from i1 become loads from i8 followed by trunc Stores to i1 become zext to i8 followed by store to i8 Fixes PR13291 llvm-svn: 167948 --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 60 ++++++++++++++++++++++++++++- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 3 ++ llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll | 26 +++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index be771e3..8430e21 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -174,10 +174,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PTX does not support load / store predicate registers - setOperationAction(ISD::LOAD, MVT::i1, Expand); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setOperationAction(ISD::STORE, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i32, MVT::i1, Expand); setTruncStoreAction(MVT::i16, MVT::i1, Expand); @@ -856,11 +857,66 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_SUBVECTOR: return Op; case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } + +// v = ld i1* addr +// => +// v1 = ld i8* addr +// v = trunc v1 to i1 +SDValue NVPTXTargetLowering:: +LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + LoadSDNode *LD = cast(Node); + DebugLoc dl = Node->getDebugLoc(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + assert(ExtType == ISD::NON_EXTLOAD) ; + EVT VT = Node->getValueType(0); + assert(VT == MVT::i1 && "Custom lowering for i1 load only"); + SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), + LD->getAlignment()); + SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); + // The legalizer (the caller) is expecting two values from the legalized + // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() + // in LegalizeDAG.cpp which also uses MergeValues. + SDValue Ops[] = {result, LD->getChain()}; + return DAG.getMergeValues(Ops, 2, dl); +} + +// st i1 v, addr +// => +// v1 = zxt v to i8 +// st i8, addr +SDValue NVPTXTargetLowering:: +LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + StoreSDNode *ST = cast(Node); + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3 = ST->getValue(); + EVT VT = Tmp3.getValueType(); + assert(VT == MVT::i1 && "Custom lowering for i1 store only"); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, + MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + return Result; +} + + SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, EVT v) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 86246e6..94a177c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -138,6 +138,9 @@ private: SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll new file mode 100644 index 0000000..779f779 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + +define ptx_kernel void @t1(i1* %a) { +; PTX32: mov.u16 %rc{{[0-9]+}}, 0; +; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}; +; PTX64: mov.u16 %rc{{[0-9]+}}, 0; +; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}; + store i1 false, i1* %a + ret void +} + + +define ptx_kernel void @t2(i1* %a, i8* %b) { +; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1; +; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1; +; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1; +; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1; + + %t1 = load i1* %a + %t2 = select i1 %t1, i8 1, i8 2 + store i8 %t2, i8* %b + ret void +} -- 2.7.4