// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();
-FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
- OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
+ OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
}
}
bool AMDGPUPassConfig::addPreRegAlloc() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-
- if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
- addPass(createSIAssignInterpRegsPass(*TM));
- }
addPass(createAMDGPUConvertToISAPass(*TM));
return false;
}
R600MachineScheduler.cpp
R600RegisterInfo.cpp
SIAnnotateControlFlow.cpp
- SIAssignInterpRegs.cpp
SIInsertWaits.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
+++ /dev/null
-//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This pass maps the pseudo interpolation registers to the correct physical
-/// registers.
-//
-/// Prior to executing a fragment shader, the GPU loads interpolation
-/// parameters into physical registers. The specific physical register that each
-/// interpolation parameter ends up in depends on the type of the interpolation
-/// parameter as well as how many interpolation parameters are used by the
-/// shader.
-//
-//===----------------------------------------------------------------------===//
-
-
-
-#include "AMDGPU.h"
-#include "AMDIL.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-class SIAssignInterpRegsPass : public MachineFunctionPass {
-
-private:
- static char ID;
- TargetMachine &TM;
-
- void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
- unsigned physReg, unsigned virtReg);
-
-public:
- SIAssignInterpRegsPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "SI Assign intrpolation registers"; }
-};
-
-} // End anonymous namespace
-
-char SIAssignInterpRegsPass::ID = 0;
-
-#define INTERP_VALUES 16
-#define REQUIRED_VALUE_MAX_INDEX 7
-
-struct InterpInfo {
- bool Enabled;
- unsigned Regs[3];
- unsigned RegCount;
-};
-
-
-FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
- return new SIAssignInterpRegsPass(tm);
-}
-
-bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) {
-
- struct InterpInfo InterpUse[INTERP_VALUES] = {
- {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
- {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
- {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
- {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
- {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
- {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
- {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
- {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
- {false, {AMDGPU::POS_X_FLOAT}, 1},
- {false, {AMDGPU::POS_Y_FLOAT}, 1},
- {false, {AMDGPU::POS_Z_FLOAT}, 1},
- {false, {AMDGPU::POS_W_FLOAT}, 1},
- {false, {AMDGPU::FRONT_FACE}, 1},
- {false, {AMDGPU::ANCILLARY}, 1},
- {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
- {false, {AMDGPU::POS_FIXED_PT}, 1}
- };
-
- SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
- // This pass is only needed for pixel shaders.
- if (MFI->ShaderType != ShaderType::PIXEL) {
- return false;
- }
- MachineRegisterInfo &MRI = MF.getRegInfo();
- bool ForceEnable = true;
-
- // First pass, mark the interpolation values that are used.
- for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
- for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
- RegIdx++) {
- InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
- !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
- if (InterpUse[InterpIdx].Enabled &&
- InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
- ForceEnable = false;
- }
- }
- }
-
- // At least one interpolation mode must be enabled or else the GPU will hang.
- if (ForceEnable) {
- InterpUse[0].Enabled = true;
- }
-
- unsigned UsedVgprs = 0;
-
- // Second pass, replace with VGPRs.
- for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
- if (!InterpUse[InterpIdx].Enabled) {
- continue;
- }
- MFI->SPIPSInputAddr |= (1 << InterpIdx);
-
- for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
- RegIdx++, UsedVgprs++) {
- unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
- unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
- addLiveIn(&MF, MRI, NewReg, VirtReg);
- }
- }
-
- return false;
-}
-
-void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
- MachineRegisterInfo & MRI,
- unsigned physReg, unsigned virtReg) {
- const TargetInstrInfo * TII = TM.getInstrInfo();
- if (!MRI.isLiveIn(physReg)) {
- MRI.addLiveIn(physReg, virtReg);
- MF->front().addLiveIn(physReg);
- BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
- TII->get(TargetOpcode::COPY), virtReg)
- .addReg(physReg);
- } else {
- MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
- }
-}
#include "SIISelLowering.h"
#include "AMDIL.h"
+#include "AMDGPU.h"
#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
MachineFunction &MF = DAG.getMachineFunction();
FunctionType *FType = MF.getFunction()->getFunctionType();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
assert(CallConv == CallingConv::C);
SmallVector<ISD::InputArg, 16> Splits;
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ uint32_t Skipped = 0;
+
+ for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
- // Split vertices into their elements
+ // First check if it's a PS input addr
+ if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
+
+ assert((PSInputNum <= 15) && "Too many PS inputs!");
+
+ if (!Arg.Used) {
+ // We can savely skip PS inputs
+ Skipped |= 1 << i;
+ ++PSInputNum;
+ continue;
+ }
+
+ Info->PSInputAddr |= 1 << PSInputNum++;
+ }
+
+ // Second split vertices into their elements
if (Arg.VT.isVector()) {
ISD::InputArg NewArg = Arg;
NewArg.Flags.setSplit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
+ // At least one interpolation mode must be enabled or else the GPU will hang.
+ if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
+ Info->PSInputAddr |= 1;
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ }
+
AnalyzeFormalArguments(CCInfo, Splits);
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+ if (Skipped & (1 << i)) {
+ InVals.push_back(SDValue());
+ continue;
+ }
+
CCValAssign &VA = ArgLocs[ArgIdx++];
assert(VA.isRegLoc() && "Parameter must be in a register!");
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
- case AMDGPU::SI_INTERP:
- LowerSI_INTERP(MI, *BB, I, MRI);
- break;
case AMDGPU::SI_WQM:
LowerSI_WQM(MI, *BB, I, MRI);
break;
MI->eraseFromParent();
}
-void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
- unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
- MachineOperand dst = MI->getOperand(0);
- MachineOperand iReg = MI->getOperand(1);
- MachineOperand jReg = MI->getOperand(2);
- MachineOperand attr_chan = MI->getOperand(3);
- MachineOperand attr = MI->getOperand(4);
- MachineOperand params = MI->getOperand(5);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
- .addOperand(params);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
- .addOperand(iReg)
- .addOperand(attr_chan)
- .addOperand(attr)
- .addReg(M0);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
- .addOperand(dst)
- .addReg(tmp)
- .addOperand(jReg)
- .addOperand(attr_chan)
- .addOperand(attr)
- .addReg(M0);
-
- MI->eraseFromParent();
-}
-
EVT SITargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
const SIInstrInfo * TII;
const TargetRegisterInfo * TRI;
- void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, unsigned Opocde) const;
- void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
let isCodeGenOnly = 1, isPseudo = 1 in {
-def SET_M0 : InstSI <
- (outs SReg_32:$dst),
- (ins i32imm:$src0),
- "SET_M0 $dst, $src0",
- [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))]
->;
-
def LOAD_CONST : AMDGPUShaderInst <
(outs GPRF32:$dst),
(ins i32imm:$src),
let usesCustomInserter = 1 in {
-def SI_INTERP : InstSI <
- (outs VReg_32:$dst),
- (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
- "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params",
- []
->;
-
def SI_WQM : InstSI <
(outs),
(ins),
/********** ===================== **********/
def : Pat <
- (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
+ (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
(V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
>;
def : Pat <
- (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
- (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan,
- imm:$attr, SReg_32:$params)
->;
-
-def : Pat <
- (int_SI_fs_read_face),
- (f32 FRONT_FACE)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 0),
- (f32 POS_X_FLOAT)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 1),
- (f32 POS_Y_FLOAT)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 2),
- (f32 POS_Z_FLOAT)
->;
-
-def : Pat <
- (int_SI_fs_read_pos 3),
- (f32 POS_W_FLOAT)
+ (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
+ (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
+ imm:$attr_chan, imm:$attr, M0Reg:$params),
+ (EXTRACT_SUBREG VReg_64:$ij, sub1),
+ imm:$attr_chan, imm:$attr, M0Reg:$params)
>;
/********** ================== **********/
/* Interpolation Intrinsics */
- def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
- class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
-
- def int_SI_fs_interp_linear_center : Interp;
- def int_SI_fs_interp_linear_centroid : Interp;
- def int_SI_fs_interp_persp_center : Interp;
- def int_SI_fs_interp_persp_centroid : Interp;
- def int_SI_fs_interp_constant : Interp;
-
- def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
- def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
+ def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrReadMem]>;
/* Control flow Intrinsics */
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo(),
- SPIPSInputAddr(0),
- ShaderType(0) {
+ ShaderType(0),
+ PSInputAddr(0) {
AttributeSet Set = MF.getFunction()->getAttributes();
Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
static const char *ShaderTypeAttribute;
SIMachineFunctionInfo(const MachineFunction &MF);
- unsigned SPIPSInputAddr;
unsigned ShaderType;
+ unsigned PSInputAddr;
};
} // End namespace llvm
}
}
-// virtual Interpolation registers
-def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
-def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
-def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
-def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
-def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
-def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
-def PERSP_I_W : SIReg <"PERSP_I_W">;
-def PERSP_J_W : SIReg <"PERSP_J_W">;
-def PERSP_1_W : SIReg <"PERSP_1_W">;
-def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
-def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
-def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
-def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
-def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
-def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
-def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
-def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
-def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
-def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
-def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
-def FRONT_FACE : SIReg <"FRONT_FACE">;
-def ANCILLARY : SIReg <"ANCILLARY">;
-def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
-def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
-
//===----------------------------------------------------------------------===//
// Groupings using register classes and tuples
//===----------------------------------------------------------------------===//
def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
-def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
- (add VReg_32, SReg_32,
- PERSP_SAMPLE_I, PERSP_SAMPLE_J,
- PERSP_CENTER_I, PERSP_CENTER_J,
- PERSP_CENTROID_I, PERSP_CENTROID_J,
- PERSP_I_W, PERSP_J_W, PERSP_1_W,
- LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
- LINEAR_CENTER_I, LINEAR_CENTER_J,
- LINEAR_CENTROID_I, LINEAR_CENTROID_J,
- LINE_STIPPLE_TEX_COORD,
- POS_X_FLOAT,
- POS_Y_FLOAT,
- POS_Z_FLOAT,
- POS_W_FLOAT,
- FRONT_FACE,
- ANCILLARY,
- SAMPLE_COVERAGE,
- POS_FIXED_PT
- )
->;
+def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;