From: Tom Stellard Date: Wed, 25 Jul 2012 19:17:16 +0000 (+0000) Subject: radeon/llvm: Add live-in registers during DAG lowering X-Git-Tag: accepted/2.0alpha-wayland/20121114.171706~556 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=40c41fe890e53d99afb4e2c3fbf10043081edd9e;p=profile%2Fivi%2Fmesa.git radeon/llvm: Add live-in registers during DAG lowering Psuedo instructions emulating live-in registers have been removed and their corresponding intrinsics are now being lowered during DAG lowering. --- diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp index 568c608..2e67828 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp @@ -15,6 +15,7 @@ #include "AMDILIntrinsicInfo.h" #include "AMDGPUUtil.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -317,6 +318,21 @@ void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI, AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg()); } +SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned VirtualRegister; + if (!MRI.isLiveIn(Reg)) { + VirtualRegister = MRI.createVirtualRegister(RC); + MRI.addLiveIn(Reg, VirtualRegister); + } else { + VirtualRegister = MRI.getLiveInVirtReg(Reg); + } + return DAG.getRegister(VirtualRegister, VT); +} + #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h index 359f795..b05de38 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h @@ -39,6 +39,12 @@ protected: MachineRegisterInfo & MRI, const TargetInstrInfo * TII, unsigned reg) const; + /// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list + /// of the DAG's MachineFunction. This returns a Register SDNode representing + /// Reg. + SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const; + bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.cpp b/src/gallium/drivers/radeon/AMDGPUUtil.cpp index b571f4b..8ad4ed5 100644 --- a/src/gallium/drivers/radeon/AMDGPUUtil.cpp +++ b/src/gallium/drivers/radeon/AMDGPUUtil.cpp @@ -32,7 +32,6 @@ bool AMDGPU::isPlaceHolderOpcode(unsigned opcode) switch (opcode) { default: return false; case AMDGPU::RETURN: - case AMDGPU::LOAD_INPUT: case AMDGPU::LAST: case AMDGPU::MASK_WRITE: case AMDGPU::RESERVE_REG: diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index 4f2bfe3..bfc9227 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -38,6 +38,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::FSUB, MVT::f32, Expand); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::ROTL, MVT::i32, Custom); @@ -58,24 +59,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); - case AMDGPU::TGID_X: - addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X); - break; - case AMDGPU::TGID_Y: - addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y); - break; - case AMDGPU::TGID_Z: - addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z); - break; - case AMDGPU::TIDIG_X: - addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X); - break; - case AMDGPU::TIDIG_Y: - addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y); - break; - case AMDGPU::TIDIG_Z: - addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z); - break; case AMDGPU::NGROUPS_X: lowerImplicitParameter(MI, *BB, MRI, 0); break; @@ -135,14 +118,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::LOAD_INPUT: - { - int64_t RegIndex = MI->getOperand(1).getImm(); - addLiveIn(MI, MF, MRI, TII, - AMDGPU::R600_TReg32RegClass.getRegister(RegIndex)); - break; - } - case AMDGPU::MASK_WRITE: { unsigned maskedRegister = MI->getOperand(0).getReg(); @@ -264,6 +239,8 @@ void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBl // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// +using namespace llvm::Intrinsic; +using namespace llvm::AMDGPUIntrinsic; SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -288,11 +265,47 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const } return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); } - default: return SDValue(); + // default for switch(IntrinsicID) + default: break; } + // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) break; } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntrinsicID = + cast(Op.getOperand(0))->getZExtValue(); + EVT VT = Op.getValueType(); + switch(IntrinsicID) { + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case AMDGPUIntrinsic::R600_load_input: { + int64_t RegIndex = cast(Op.getOperand(1))->getZExtValue(); + unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); + } + case r600_read_tgid_x: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_X, VT); + case r600_read_tgid_y: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_Y, VT); + case r600_read_tgid_z: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_Z, VT); + case r600_read_tidig_x: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_X, VT); + case r600_read_tidig_y: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_Y, VT); + case r600_read_tidig_z: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_Z, VT); + } + // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) + break; } + } // end switch(Op.getOpcode()) + return SDValue(); } SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 57bb6cb..45598a6 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -1105,14 +1105,6 @@ class R600PreloadInst : AMDGPUInst < [(set R600_TReg32:$dst, (intr))] >; -def TGID_X : R600PreloadInst <"TGID_X", int_r600_read_tgid_x>; -def TGID_Y : R600PreloadInst <"TGID_Y", int_r600_read_tgid_y>; -def TGID_Z : R600PreloadInst <"TGID_Z", int_r600_read_tgid_z>; - -def TIDIG_X : R600PreloadInst <"TIDIG_X", int_r600_read_tidig_x>; -def TIDIG_Y : R600PreloadInst <"TIDIG_Y", int_r600_read_tidig_y>; -def TIDIG_Z : R600PreloadInst <"TIDIG_Z", int_r600_read_tidig_z>; - def NGROUPS_X : R600PreloadInst <"NGROUPS_X", int_r600_read_ngroups_x>; def NGROUPS_Y : R600PreloadInst <"NGROUPS_Y", int_r600_read_ngroups_y>; def NGROUPS_Z : R600PreloadInst <"NGROUPS_Z", int_r600_read_ngroups_z>; @@ -1138,13 +1130,6 @@ def R600_LOAD_CONST : AMDGPUShaderInst < [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] >; -def LOAD_INPUT : AMDGPUShaderInst < - (outs R600_Reg32:$dst), - (ins i32imm:$src), - "LOAD_INPUT $dst, $src", - [(set R600_Reg32:$dst, (int_R600_load_input imm:$src))] ->; - def RESERVE_REG : AMDGPUShaderInst < (outs), (ins i32imm:$src), diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index 3e08e88..270e4a1 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "SIISelLowering.h" +#include "AMDILIntrinsicInfo.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -44,6 +45,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -128,10 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( lowerUSE_SGPR(MI, BB->getParent(), MRI); MI->eraseFromParent(); break; - case AMDGPU::VS_LOAD_BUFFER_INDEX: - addLiveIn(MI, BB->getParent(), MRI, TII, AMDGPU::VGPR0); - MI->eraseFromParent(); - break; } return BB; } @@ -241,7 +240,20 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntrinsicID = + cast(Op.getOperand(0))->getZExtValue(); + EVT VT = Op.getValueType(); + switch (IntrinsicID) { + case AMDGPUIntrinsic::SI_vs_load_buffer_index: + return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, + AMDGPU::VGPR0, VT); + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + } + break; + } } + return SDValue(); } /// Loweri1ContextSwitch - The function is for lowering i1 operations on the diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index a91606e..aad2ade 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -970,15 +970,6 @@ def USE_SGPR_64 : InstSI < [(set (i64 SReg_64:$dst), (load_user_sgpr imm:$src0))] >; -def VS_LOAD_BUFFER_INDEX : InstSI < - (outs VReg_32:$dst), - (ins), - "VS_LOAD_BUFFER_INDEX", - [(set VReg_32:$dst, (int_SI_vs_load_buffer_index))]> { - - field bits<32> Inst = 0; -} - } // end usesCustomInserter // SI Psuedo branch instructions. These are used by the CFG structurizer pass @@ -1058,13 +1049,6 @@ def : Pat < (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) >; -/* -def : Pat< - (int_SI_vs_load_buffer_index), - (COPY_TO_REGCLASS (f32 VGPR0), VReg_32) ->; -*/ - /********** ===================== **********/ /********** Interpolation Paterns **********/ /********** ===================== **********/ diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td index 95273a2..6eadc94 100644 --- a/src/gallium/drivers/radeon/SIIntrinsics.td +++ b/src/gallium/drivers/radeon/SIIntrinsics.td @@ -17,7 +17,7 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; /* XXX: We may need a seperate intrinsic here for loading integer values */ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; - def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], []>; + def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ; def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index ae8c9a9..4f76224 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -229,8 +229,9 @@ static void declare_input_vs( /* Load the buffer index is always, which is always stored in VGPR0 * for Vertex Shaders */ - buffer_index_reg = lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0); + buffer_index_reg = build_intrinsic(base->gallivm->builder, + "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0, + LLVMReadNoneAttribute); vec4_type = LLVMVectorType(base->elem_type, 4); args[0] = t_list;