From: Tom Stellard Date: Tue, 8 Aug 2017 04:57:55 +0000 (+0000) Subject: AMDGPU: Move R600 parts of AMDGPUISelDAGToDAG into their own class X-Git-Tag: llvmorg-6.0.0-rc1~10641 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=20287697f8c80bb17ae1b0b29678c80272eaa52a;p=platform%2Fupstream%2Fllvm.git AMDGPU: Move R600 parts of AMDGPUISelDAGToDAG into their own class Summary: This refactoring is required in order to split the R600 and GCN tablegen files. Reviewers: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D36286 llvm-svn: 310336 --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index e681874..36e1b72 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -34,6 +34,7 @@ FunctionPass *createR600ClauseMergePass(); FunctionPass *createR600Packetizer(); FunctionPass *createR600ControlFlowFinalizer(); FunctionPass *createAMDGPUCFGStructurizerPass(); +FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 9db5121..6ecbe27 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -88,6 +88,9 @@ public: StringRef getPassName() const override; void PostprocessISelDAG() override; +protected: + void SelectBuildVector(SDNode *N, unsigned RegClassID); + private: std::pair foldFrameIndex(SDValue N) const; bool isNoNanSrc(SDValue N) const; @@ -106,8 +109,8 @@ private: bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, SDValue& Offset); - bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); + virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); + virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, unsigned OffsetBits) const; bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; @@ -207,10 +210,24 @@ private: void SelectBRCOND(SDNode *N); void SelectATOMIC_CMP_SWAP(SDNode *N); +protected: // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" }; +class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { +public: + explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : + AMDGPUDAGToDAGISel(TM, OptLevel) {} + + void Select(SDNode *N) override; + + bool SelectADDRIndirect(SDValue Addr, SDValue &Base, + SDValue &Offset) override; + bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, + SDValue &Offset) override; +}; + } // end anonymous namespace INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", @@ -226,6 +243,13 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, return new AMDGPUDAGToDAGISel(TM, OptLevel); } +/// \brief This pass converts a legalized DAG into a R600-specific +// DAG, ready for instruction scheduling. +FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, + CodeGenOpt::Level OptLevel) { + return new R600DAGToDAGISel(TM, OptLevel); +} + bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); @@ -304,8 +328,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - cast(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) + if (cast(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) return N; const SITargetLowering& Lowering = @@ -359,6 +382,59 @@ static bool getConstantValue(SDValue N, uint32_t &Out) { return false; } +void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { + unsigned Opc = N->getOpcode(); + EVT VT = N->getValueType(0); + unsigned NumVectorElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); + SDLoc DL(N); + SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); + + if (NumVectorElts == 1) { + CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), + RegClass); + return; + } + + assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " + "supported yet"); + // 16 = Max Num Vector Elements + // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) + // 1 = Vector Register Class + SmallVector RegSeqArgs(NumVectorElts * 2 + 1); + + RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); + bool IsRegSeq = true; + unsigned NOps = N->getNumOperands(); + for (unsigned i = 0; i < NOps; i++) { + // XXX: Why is this here? + if (isa(N->getOperand(i))) { + IsRegSeq = false; + break; + } + RegSeqArgs[1 + (2 * i)] = N->getOperand(i); + RegSeqArgs[1 + (2 * i) + 1] = + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, + MVT::i32); + } + if (NOps != NumVectorElts) { + // Fill in the missing undef elements if this was a scalar_to_vector. + assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); + MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + DL, EltVT); + for (unsigned i = NOps; i < NumVectorElts; ++i) { + RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); + RegSeqArgs[1 + (2 * i) + 1] = + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); + } + } + + if (!IsRegSeq) + SelectCode(N); + CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); +} + void AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { @@ -381,8 +457,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::SUB: case ISD::SUBC: case ISD::SUBE: { - if (N->getValueType(0) != MVT::i64 || - Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (N->getValueType(0) != MVT::i64) break; SelectADD_SUB_I64(N); @@ -403,10 +478,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { } case ISD::SCALAR_TO_VECTOR: - case AMDGPUISD::BUILD_VERTICAL_VECTOR: case ISD::BUILD_VECTOR: { - unsigned RegClassID; - const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); @@ -427,80 +499,12 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { } assert(EltVT.bitsEq(MVT::i32)); - - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - RegClassID = selectSGPRVectorRegClassID(NumVectorElts); - } else { - // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG - // that adds a 128 bits reg copy when going through TwoAddressInstructions - // pass. We want to avoid 128 bits copies as much as possible because they - // can't be bundled by our scheduler. - switch(NumVectorElts) { - case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; - case 4: - if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) - RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; - else - RegClassID = AMDGPU::R600_Reg128RegClassID; - break; - default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); - } - } - - SDLoc DL(N); - SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); - - if (NumVectorElts == 1) { - CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), - RegClass); - return; - } - - assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " - "supported yet"); - // 16 = Max Num Vector Elements - // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) - // 1 = Vector Register Class - SmallVector RegSeqArgs(NumVectorElts * 2 + 1); - - RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); - bool IsRegSeq = true; - unsigned NOps = N->getNumOperands(); - for (unsigned i = 0; i < NOps; i++) { - // XXX: Why is this here? - if (isa(N->getOperand(i))) { - IsRegSeq = false; - break; - } - RegSeqArgs[1 + (2 * i)] = N->getOperand(i); - RegSeqArgs[1 + (2 * i) + 1] = - CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, - MVT::i32); - } - - if (NOps != NumVectorElts) { - // Fill in the missing undef elements if this was a scalar_to_vector. - assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); - - MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - DL, EltVT); - for (unsigned i = NOps; i < NumVectorElts; ++i) { - RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); - RegSeqArgs[1 + (2 * i) + 1] = - CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); - } - } - - if (!IsRegSeq) - break; - CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); + unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); + SelectBuildVector(N, RegClassID); return; } case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; - if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - break; - } SDLoc DL(N); if (N->getValueType(0) == MVT::i128) { RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); @@ -522,8 +526,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: case ISD::ConstantFP: { - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) + if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) break; uint64_t Imm; @@ -558,9 +561,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) - break; - // There is a scalar version available, but unlike the vector version which // has a separate operand for the offset and width, the scalar version packs // the width and offset into a single operand. Try to move to the scalar @@ -600,8 +600,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::SRL: case ISD::SRA: case ISD::SIGN_EXTEND_INREG: - if (N->getValueType(0) != MVT::i32 || - Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + if (N->getValueType(0) != MVT::i32) break; SelectS_BFE(N); @@ -663,32 +662,8 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, } bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, - SDValue &Offset) { - ConstantSDNode *IMMOffset; - - if (Addr.getOpcode() == ISD::ADD - && (IMMOffset = dyn_cast(Addr.getOperand(1))) - && isInt<16>(IMMOffset->getZExtValue())) { - - Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), - MVT::i32); - return true; - // If the pointer address is constant, we can move it to the offset field. - } else if ((IMMOffset = dyn_cast(Addr)) - && isInt<16>(IMMOffset->getZExtValue())) { - Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - SDLoc(CurDAG->getEntryNode()), - AMDGPU::ZERO, MVT::i32); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), - MVT::i32); - return true; - } - - // Default case, no offset - Base = Addr; - Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); - return true; + SDValue &Offset) { + return false; } bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, @@ -1956,3 +1931,93 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() { CurDAG->RemoveDeadNodes(); } while (IsModified); } + +void R600DAGToDAGISel::Select(SDNode *N) { + unsigned int Opc = N->getOpcode(); + if (N->isMachineOpcode()) { + N->setNodeId(-1); + return; // Already selected. + } + + switch (Opc) { + default: break; + case AMDGPUISD::BUILD_VERTICAL_VECTOR: + case ISD::SCALAR_TO_VECTOR: + case ISD::BUILD_VECTOR: { + EVT VT = N->getValueType(0); + unsigned NumVectorElts = VT.getVectorNumElements(); + unsigned RegClassID; + // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG + // that adds a 128 bits reg copy when going through TwoAddressInstructions + // pass. We want to avoid 128 bits copies as much as possible because they + // can't be bundled by our scheduler. + switch(NumVectorElts) { + case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; + case 4: + if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) + RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; + else + RegClassID = AMDGPU::R600_Reg128RegClassID; + break; + default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); + } + SelectBuildVector(N, RegClassID); + return; + } + } + + SelectCode(N); +} + +bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode *C; + SDLoc DL(Addr); + + if ((C = dyn_cast(Addr))) { + Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && + (C = dyn_cast(Addr.getOperand(0)))) { + Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && + (C = dyn_cast(Addr.getOperand(1)))) { + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); + } + + return true; +} + +bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode *IMMOffset; + + if (Addr.getOpcode() == ISD::ADD + && (IMMOffset = dyn_cast(Addr.getOperand(1))) + && isInt<16>(IMMOffset->getZExtValue())) { + + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), + MVT::i32); + return true; + // If the pointer address is constant, we can move it to the offset field. + } else if ((IMMOffset = dyn_cast(Addr)) + && isInt<16>(IMMOffset->getZExtValue())) { + Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + SDLoc(CurDAG->getEntryNode()), + AMDGPU::ZERO, MVT::i32); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), + MVT::i32); + return true; + } + + // Default case, no offset + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); + return true; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0511bb1..96e40cb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -488,6 +488,7 @@ public: } bool addPreISel() override; + bool addInstSelector() override; void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; @@ -660,6 +661,11 @@ bool R600PassConfig::addPreISel() { return false; } +bool R600PassConfig::addInstSelector() { + addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel())); + return false; +} + void R600PassConfig::addPreRegAlloc() { addPass(createR600VectorRegMerger()); }