// R600 Passes
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();
addPass(createAMDGPUCFGPreparationPass(*TM));
addPass(createAMDGPUCFGStructurizerPass(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
+ addPass(createR600LowerConstCopy(*TM));
addPass(&FinalizeMachineBundlesID);
} else {
addPass(createSILowerLiteralConstantsPass(*TM));
enum AddressSpaces {
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
+ CONSTANT_ADDRESS = 2, ///< Address space for constant memory
LOCAL_ADDRESS = 3, ///< Address space for local memory.
REGION_ADDRESS = 4, ///< Address space for region memory.
ADDRESS_NONE = 5, ///< Address space for unknown memory.
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
- LAST_ADDRESS = 9
+ CONSTANT_BUFFER_0 = 9,
+ CONSTANT_BUFFER_1 = 10,
+ CONSTANT_BUFFER_2 = 11,
+ CONSTANT_BUFFER_3 = 12,
+ CONSTANT_BUFFER_4 = 13,
+ CONSTANT_BUFFER_5 = 14,
+ CONSTANT_BUFFER_6 = 15,
+ CONSTANT_BUFFER_7 = 16,
+ CONSTANT_BUFFER_8 = 17,
+ CONSTANT_BUFFER_9 = 18,
+ CONSTANT_BUFFER_10 = 19,
+ CONSTANT_BUFFER_11 = 20,
+ CONSTANT_BUFFER_12 = 21,
+ CONSTANT_BUFFER_13 = 22,
+ CONSTANT_BUFFER_14 = 23,
+ CONSTANT_BUFFER_15 = 24,
+ LAST_ADDRESS = 25
};
} // namespace AMDGPUAS
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include <list>
#include <queue>
private:
inline SDValue getSmallIPtrImm(unsigned Imm);
+ bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
// Complex pattern selectors
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
static bool isLocalLoad(const LoadSDNode *N);
static bool isRegionLoad(const LoadSDNode *N);
+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+ bool SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue &BaseReg, SDValue& Offset);
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
break;
}
}
- return SelectCode(N);
+ SDNode *Result = SelectCode(N);
+
+ // Fold operands of selected node
+
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+ if (Result && TII->isALUInstr(Result->getMachineOpcode())) {
+ bool IsModified = false;
+ do {
+ std::vector<SDValue> Ops;
+ for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
+ I != E; ++I)
+ Ops.push_back(*I);
+ IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
+ if (IsModified) {
+ Result = CurDAG->MorphNodeTo(Result, Result->getOpcode(),
+ Result->getVTList(), Ops.data(), Ops.size());
+ }
+ } while (IsModified);
+ }
+ }
+
+ return Result;
+}
+
+bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
+ const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
+ int OperandIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2)
+ };
+ int SelIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
+ };
+ for (unsigned i = 0; i < 3; i++) {
+ if (OperandIdx[i] < 0)
+ return false;
+ SDValue Operand = Ops[OperandIdx[i] - 1];
+ switch (Operand.getOpcode()) {
+ case AMDGPUISD::CONST_ADDRESS: {
+ SDValue CstOffset;
+ if (!Operand.getValueType().isVector() &&
+ SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Ops[SelIdx[i] - 1] = CstOffset;
+ return true;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
}
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
///==== AMDGPU Functions ====///
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+ SDValue& IntPtr) {
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue& BaseReg, SDValue &Offset) {
+ if (!dyn_cast<ConstantSDNode>(Addr)) {
+ BaseReg = Addr;
+ Offset = CurDAG->getIntPtrConstant(0, true);
+ return true;
+ }
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
SDValue& Offset) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
R600ExpandSpecialInstrs.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
+ R600LowerConstCopy.cpp
R600MachineFunctionInfo.cpp
R600RegisterInfo.cpp
SIAnnotateControlFlow.cpp
}
}
+void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const char * chans = "XYZW";
+ int sel = MI->getOperand(OpNo).getImm();
+
+ int chan = sel & 3;
+ sel >>= 2;
+
+ if (sel >= 512) {
+ sel -= 512;
+ int cb = sel >> 12;
+ sel &= 4095;
+ O << cb << "[" << sel << "]";
+ } else if (sel >= 448) {
+ sel -= 448;
+ O << sel;
+ } else if (sel >= 0){
+ O << sel;
+ }
+
+ if (sel >= 0)
+ O << "." << chans[chan];
+}
+
#include "AMDGPUGenAsmWriter.inc"
void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // End namespace llvm
void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
- void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
- raw_ostream &OS) const;
+ void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
+ raw_ostream &OS) const;
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
case AMDGPU::VTX_READ_PARAM_32_eg:
case AMDGPU::VTX_READ_GLOBAL_8_eg:
case AMDGPU::VTX_READ_GLOBAL_32_eg:
- case AMDGPU::VTX_READ_GLOBAL_128_eg: {
+ case AMDGPU::VTX_READ_GLOBAL_128_eg:
+ case AMDGPU::TEX_VTX_CONSTBUF: {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const {
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
- unsigned NumOperands = MI.getNumOperands();
// Emit instruction type
EmitByte(INSTR_ALU, OS);
InstWord01 |= ISAOpCode << 1;
}
- unsigned SrcIdx = 0;
- for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
- if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
- OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
- continue;
- }
- EmitSrcISA(MI, OpIdx, InstWord01, OS);
- SrcIdx++;
- }
+ unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
+ MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
- // Emit zeros for unused sources
- for ( ; SrcIdx < 3; SrcIdx++) {
- EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
+ EmitByte(SrcNum, OS);
+
+ const unsigned SrcOps[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL}
+ };
+
+ for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
+ unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
+ unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
+ EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
}
Emit(InstWord01, OS);
}
-void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
- uint64_t &Value, raw_ostream &OS) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
+ unsigned SelOpIdx, raw_ostream &OS) const {
+ const MCOperand &RegMO = MI.getOperand(RegOpIdx);
+ const MCOperand &SelMO = MI.getOperand(SelOpIdx);
+
union {
float f;
uint32_t i;
} InlineConstant;
InlineConstant.i = 0;
- // Emit the source select (2 bytes). For GPRs, this is the register index.
- // For other potential instruction operands, (e.g. constant registers) the
- // value of the source select is defined in the r600isa docs.
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
- EmitByte(1, OS);
- } else {
- EmitByte(0, OS);
- }
+ // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
+ // and select is 0 (GPR index is encoded in the instr encoding. For constants
+ // type is 1 and select is the original const select passed from the driver.
+ unsigned Reg = RegMO.getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ EmitByte(1, OS);
+ uint32_t Sel = SelMO.getImm();
+ Emit(Sel, OS);
+ } else {
+ EmitByte(0, OS);
+ Emit((uint32_t)0, OS);
+ }
- if (Reg == AMDGPU::ALU_LITERAL_X) {
- unsigned ImmOpIndex = MI.getNumOperands() - 1;
- MCOperand ImmOp = MI.getOperand(ImmOpIndex);
- if (ImmOp.isFPImm()) {
- InlineConstant.f = ImmOp.getFPImm();
- } else {
- assert(ImmOp.isImm());
- InlineConstant.i = ImmOp.getImm();
- }
+ if (Reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+ if (ImmOp.isFPImm()) {
+ InlineConstant.f = ImmOp.getFPImm();
+ } else {
+ assert(ImmOp.isImm());
+ InlineConstant.i = ImmOp.getImm();
}
}
SRC0_NEG,
SRC0_REL,
SRC0_ABS,
+ SRC0_SEL,
SRC1,
SRC1_NEG,
SRC1_REL,
SRC1_ABS,
+ SRC1_SEL,
SRC2,
SRC2_NEG,
SRC2_REL,
+ SRC2_SEL,
LAST,
PRED_SEL,
IMM,
COUNT
};
+
+ const static int ALUOpTable[3][R600Operands::COUNT] = {
+// W C S S S S S S S S S S S
+// R O D L S R R R R S R R R R S R R R L P
+// D U I M R A R C C C C R C C C C R C C C A R I
+// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
+// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
+ {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
+ {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
+ {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+ };
+
}
#endif // R600DEFINES_H_
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setSchedulingPreference(Sched::VLIW);
}
break;
}
- case AMDGPU::R600_LOAD_CONST: {
- int64_t RegIndex = MI->getOperand(1).getImm();
- unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
- .addOperand(MI->getOperand(0))
- .addReg(ConstantReg);
- break;
- }
-
case AMDGPU::MASK_WRITE: {
unsigned maskedRegister = MI->getOperand(0).getReg();
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
switch (N->getOpcode()) {
default: return;
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ return;
+ case ISD::LOAD: {
+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ Results.push_back(SDValue(Node, 1));
+ // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+ // function
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+ return;
+ }
}
}
return SDValue();
}
+// return (512 + (kc_bank << 12)
+static int
+ConstantAddressBlock(unsigned AddressSpace) {
+ switch (AddressSpace) {
+ case AMDGPUAS::CONSTANT_BUFFER_0:
+ return 512;
+ case AMDGPUAS::CONSTANT_BUFFER_1:
+ return 512 + 4096;
+ case AMDGPUAS::CONSTANT_BUFFER_2:
+ return 512 + 4096 * 2;
+ case AMDGPUAS::CONSTANT_BUFFER_3:
+ return 512 + 4096 * 3;
+ case AMDGPUAS::CONSTANT_BUFFER_4:
+ return 512 + 4096 * 4;
+ case AMDGPUAS::CONSTANT_BUFFER_5:
+ return 512 + 4096 * 5;
+ case AMDGPUAS::CONSTANT_BUFFER_6:
+ return 512 + 4096 * 6;
+ case AMDGPUAS::CONSTANT_BUFFER_7:
+ return 512 + 4096 * 7;
+ case AMDGPUAS::CONSTANT_BUFFER_8:
+ return 512 + 4096 * 8;
+ case AMDGPUAS::CONSTANT_BUFFER_9:
+ return 512 + 4096 * 9;
+ case AMDGPUAS::CONSTANT_BUFFER_10:
+ return 512 + 4096 * 10;
+ case AMDGPUAS::CONSTANT_BUFFER_11:
+ return 512 + 4096 * 11;
+ case AMDGPUAS::CONSTANT_BUFFER_12:
+ return 512 + 4096 * 12;
+ case AMDGPUAS::CONSTANT_BUFFER_13:
+ return 512 + 4096 * 13;
+ case AMDGPUAS::CONSTANT_BUFFER_14:
+ return 512 + 4096 * 14;
+ case AMDGPUAS::CONSTANT_BUFFER_15:
+ return 512 + 4096 * 15;
+ default:
+ return -1;
+ }
+}
+
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+
+ int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+ if (ConstantBlock > -1) {
+ SDValue Result;
+ if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
+ dyn_cast<Constant>(LoadNode->getSrcValue())) {
+ SDValue Slots[4];
+ for (unsigned i = 0; i < 4; i++) {
+ // We want Const position encoded with the following formula :
+ // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+ // const_index is Ptr computed by llvm using an alignment of 16.
+ // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+ // then div by 4 at the ISel step
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+ Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+ }
+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
+ } else {
+ // non constant ptr cant be folded, keeps it as a v4f32 load
+ Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
+ );
+ }
+
+ if (!VT.isVector()) {
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ SDValue MergedValues[2] = {
+ Result,
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+ }
+
+ return SDValue();
+}
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
SelectionDAG &DAG) const {
}
break;
}
+ // Extract_vec (Build_vector) generated by custom lowering
+ // also needs to be customly combined
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Arg = N->getOperand(0);
+ if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned Element = Const->getZExtValue();
+ return Arg->getOperand(Element);
+ }
+ }
+ }
}
return SDValue();
}
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
bool isZero(SDValue Op) const;
};
.addReg(Src0Reg) // $src0
.addImm(0) // $src0_neg
.addImm(0) // $src0_rel
- .addImm(0); // $src0_abs
+ .addImm(0) // $src0_abs
+ .addImm(-1); // $src0_sel
if (Src1Reg) {
MIB.addReg(Src1Reg) // $src1
.addImm(0) // $src1_neg
.addImm(0) // $src1_rel
- .addImm(0); // $src1_abs
+ .addImm(0) // $src1_abs
+ .addImm(-1); // $src1_sel
}
//XXX: The r600g finalizer expects this to be 1, once we've moved the
int R600InstrInfo::getOperandIdx(unsigned Opcode,
R600Operands::Ops Op) const {
- const static int OpTable[3][R600Operands::COUNT] = {
-// W C S S S S S S S S
-// R O D L S R R R S R R R S R R L P
-// D U I M R A R C C C C C C C R C C A R I
-// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M
-// T M P E D L P 0 N R A 1 N R A 2 N R T D M
- {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
- {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
- {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
- };
unsigned TargetFlags = get(Opcode).TSFlags;
unsigned OpTableIdx;
OpTableIdx = 2;
}
- return OpTable[OpTableIdx][Op];
+ return R600Operands::ALUOpTable[OpTableIdx][Op];
}
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
let PrintMethod = PM;
}
+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
+ let PrintMethod = "printSel";
+}
+
def LITERAL : InstFlag<"printLiteral">;
def WRITE : InstFlag <"printWrite", 1>;
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
class R600ALU_Word0 {
field bits<32> Word0;
InstR600 <0,
(outs R600_Reg32:$dst),
(ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName,
"$clamp $dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
(outs R600_Reg32:$dst),
(ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName,
"$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
- "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
+ "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
+ "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
InstR600 <0,
(outs R600_Reg32:$dst),
(ins REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
- R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+ R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName, "$clamp $dst$dst_rel, "
- "$src0_neg$src0$src0_rel, "
- "$src1_neg$src1$src1_rel, "
- "$src2_neg$src2$src2_rel, "
+ "$src0_neg$src0$src0_sel$src0_rel, "
+ "$src1_neg$src1$src1_sel$src1_rel, "
+ "$src2_neg$src2$src2_sel$src2_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
>;
def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>,
+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
[SDNPMayLoad]
>;
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
-def R600_LOAD_CONST : AMDGPUShaderInst <
- (outs R600_Reg32:$dst),
- (ins i32imm:$src0),
- "R600_LOAD_CONST $dst, $src0",
- [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
->;
def RESERVE_REG : AMDGPUShaderInst <
(outs),
"RESERVE_REG $src",
[(int_AMDGPU_reserve_reg imm:$src)]
>;
-
def TXD: AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
"RETURN", [(IL_retflag)]>;
}
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+def CONST_COPY : Instruction {
+ let OutOperandList = (outs R600_Reg32:$dst);
+ let InOperandList = (ins i32imm:$src);
+ let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+ let AsmString = "CONST_COPY";
+ let neverHasSideEffects = 1;
+ let isAsCheapAsAMove = 1;
+ let Itinerary = NullALU;
+}
+} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
+ VTX_WORD1_GPR, VTX_WORD0 {
+
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = 0;
+ let SRC_REL = 0;
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 2;
+ let FORMAT_COMP_ALL = 1;
+ let SRF_MODE_ALL = 1;
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 35;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
//===--------------------------------------------------------------------===//
// Instructions support
//===--------------------------------------------------------------------===//
--- /dev/null
+//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
+/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
+/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
+/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
+/// to fold them if possible or replace them by MOV otherwise.
+/// TODO : Implement the folding part, using Copy Propagation algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/GlobalValue.h"
+
+namespace llvm {
+
+class R600LowerConstCopy : public MachineFunctionPass {
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+public:
+ R600LowerConstCopy(TargetMachine &tm);
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
+};
+
+char R600LowerConstCopy::ID = 0;
+
+
+R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
+{
+}
+
+bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E;) {
+ MachineInstr &MI = *I;
+ I = llvm::next(I);
+ if (MI.getOpcode() != AMDGPU::CONST_COPY)
+ continue;
+ MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
+ MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
+ NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
+
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
+ return new R600LowerConstCopy(tm);
+}
+
+}
+
+
Reserved.set(AMDGPU::NEG_ONE);
Reserved.set(AMDGPU::PV_X);
Reserved.set(AMDGPU::ALU_LITERAL_X);
+ Reserved.set(AMDGPU::ALU_CONST);
Reserved.set(AMDGPU::PREDICATE_BIT);
Reserved.set(AMDGPU::PRED_SEL_OFF);
Reserved.set(AMDGPU::PRED_SEL_ZERO);
Reserved.set(AMDGPU::PRED_SEL_ONE);
- for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
- E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
- Reserved.set(*I);
- }
-
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
E = MFI->ReservedRegs.end(); I != E; ++I) {
Reserved.set(*I);
foreach Chan = [ "X", "Y", "Z", "W" ] in {
// 32-bit Temporary Registers
def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
-
- // 32-bit Constant Registers (There are more than 128, this the number
- // that is currently supported.
- def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
}
// 128-bit Temporary Registers
def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "ArrayBase%u", 448, 464))>;
-
-def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (interleave
- (interleave (sequence "C%u_X", 0, 127),
- (sequence "C%u_Z", 0, 127)),
- (interleave (sequence "C%u_Y", 0, 127),
- (sequence "C%u_W", 0, 127))))>;
+// special registers for ALU src operands
+// const buffer reference, SRCx_SEL contains index
+def ALU_CONST : R600Reg<"CBuf", 0>;
+// interpolation param reference, SRCx_SEL contains index
+def ALU_PARAM : R600Reg<"Param", 0>;
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_X", 0, 127))>;
(add (sequence "T%u_W", 0, 127))>;
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (interleave
- (interleave R600_TReg32_X, R600_TReg32_Z),
- (interleave R600_TReg32_Y, R600_TReg32_W)))>;
+ (interleave R600_TReg32_X, R600_TReg32_Y,
+ R600_TReg32_Z, R600_TReg32_W)>;
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
R600_TReg32,
- R600_CReg32,
R600_ArrayBase,
- ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
+ ALU_CONST, ALU_PARAM
+ )>;
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;