From 8263408a91b6b3beb5af5de6bdc7e5d13197a268 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Wed, 1 Aug 2012 22:49:40 +0200 Subject: [PATCH] radeon/llvm: Support for predicate bit Tom Stellard: - A few changes to predicate register defs Signed-off-by: Tom Stellard --- src/gallium/drivers/radeon/AMDGPUInstrInfo.h | 6 ++++ src/gallium/drivers/radeon/R600CodeEmitter.cpp | 34 +++++++++++++++++++++-- src/gallium/drivers/radeon/R600GenRegisterInfo.pl | 10 +++++++ src/gallium/drivers/radeon/R600ISelLowering.cpp | 13 ++++++--- src/gallium/drivers/radeon/R600InstrInfo.cpp | 30 +++++++++++++++++++- src/gallium/drivers/radeon/R600InstrInfo.h | 3 ++ src/gallium/drivers/radeon/R600Instructions.td | 29 +++++++++++++++---- src/gallium/drivers/radeon/R600RegisterInfo.cpp | 13 +++++++++ 8 files changed, 125 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h index 7232c0b..28952cf 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h @@ -29,6 +29,12 @@ #define MO_FLAG_NEG (1 << 1) #define MO_FLAG_ABS (1 << 2) #define MO_FLAG_MASK (1 << 3) +#define MO_FLAG_PUSH (1 << 4) + +#define OPCODE_IS_ZERO_INT 0x00000045 +#define OPCODE_IS_NOT_ZERO_INT 0x00000042 +#define OPCODE_IS_ZERO 0x00000020 +#define OPCODE_IS_NOT_ZERO 0x00000023 namespace llvm { diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index 870d375..02b6fdb 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -235,6 +235,8 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) { unsigned numOperands = MI.getNumExplicitOperands(); + if(MI.findFirstPredOperandIdx() > -1) + numOperands--; // Some instructions are just place holder instructions that represent // operations that the GPU does automatically. They should be ignored. @@ -242,6 +244,9 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) return; } + if(MI.getOpcode() == AMDGPU::PRED_X) + numOperands = 2; + // XXX Check if instruction writes a result if (numOperands < 1) { return; @@ -343,7 +348,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override) void R600CodeEmitter::EmitDst(const MachineOperand & MO) { - if (MO.isReg()) { + if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) { // Emit the destination register index (1 byte) EmitByte(getHWReg(MO.getReg())); @@ -396,8 +401,31 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc) EmitByte(0); } - // XXX: Emit predicate (1 byte) - EmitByte(0); + // XXX: Emit push modifier + if(MI.getOperand(1).getTargetFlags() & MO_FLAG_PUSH) { + EmitByte(1); + } else { + EmitByte(0); + } + + // XXX: Emit predicate (1 byte) + int predidx = MI.findFirstPredOperandIdx(); + if (predidx > -1) + switch(MI.getOperand(predidx).getReg()) { + case AMDGPU::PRED_SEL_ZERO: + EmitByte(2); + break; + case AMDGPU::PRED_SEL_ONE: + EmitByte(3); + break; + default: + EmitByte(0); + break; + } + else { + EmitByte(0); + } + // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like // r600_asm.c sets it. diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl index 6bbe21c..a28a3ad 100644 --- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl +++ b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl @@ -69,6 +69,10 @@ def NEG_HALF : R600Reg<"-0.5">; def NEG_ONE : R600Reg<"-1.0">; def PV_X : R600Reg<"pv.x">; def ALU_LITERAL_X : R600Reg<"literal.x">; +def PREDICATE_BIT : R600Reg<"PredicateBit">; +def PRED_SEL_OFF: R600Reg<"Pred_sel_off">; +def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero">; +def PRED_SEL_ONE : R600Reg<"Pred_sel_one">; def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add $creg_list)>; @@ -84,6 +88,12 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_CReg32, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; +def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add + PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>; + +def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add + PREDICATE_BIT)>; + def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add $t128_string)> { diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index f33d90e..26f14fa 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -90,21 +90,24 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)); + .addOperand(MI->getOperand(1)) + .addReg(AMDGPU::PRED_SEL_OFF); break; case AMDGPU::FABS_R600: MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)); + .addOperand(MI->getOperand(1)) + .addReg(AMDGPU::PRED_SEL_OFF); break; case AMDGPU::FNEG_R600: MI->getOperand(1).addTargetFlag(MO_FLAG_NEG); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)); + .addOperand(MI->getOperand(1)) + .addReg(AMDGPU::PRED_SEL_OFF); break; case AMDGPU::R600_LOAD_CONST: @@ -141,10 +144,12 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( // this way and it didn't produce the correct results. BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) .addReg(AMDGPU::ALU_LITERAL_X) + .addReg(AMDGPU::PRED_SEL_OFF) .addImm(2); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) .addOperand(MI->getOperand(1)) - .addReg(ShiftValue); + .addReg(ShiftValue) + .addReg(AMDGPU::PRED_SEL_OFF); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) .addOperand(MI->getOperand(0)) .addReg(NewAddr); diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index 3c9e4eb..c807d5c 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -16,6 +16,7 @@ #include "AMDGPUSubtarget.h" #include "R600RegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "AMDILUtilityFunctions.h" #define GET_INSTRINFO_CTOR #include "AMDGPUGenDFAPacketizer.inc" @@ -59,6 +60,7 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) .addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define) .addReg(RI.getSubReg(SrcReg, subRegMap[i])) + .addReg(0) // PREDICATE_BIT .addReg(DestReg, RegState::Define | RegState::Implicit); } } else { @@ -68,7 +70,8 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(0); // PREDICATE_BIT } } @@ -79,6 +82,7 @@ MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); MachineInstrBuilder(MI).addImm(Imm); + MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT return MI; } @@ -183,3 +187,27 @@ DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const InstrItineraryData *II = TM->getInstrItineraryData(); return TM->getSubtarget().createDFAPacketizer(II); } + +bool +R600InstrInfo::isPredicated(const MachineInstr *MI) const +{ + int idx = MI->findFirstPredOperandIdx(); + if (idx < 0) + return false; + + MI->dump(); + unsigned Reg = MI->getOperand(idx).getReg(); + switch (Reg) { + default: return false; + case AMDGPU::PRED_SEL_ONE: + case AMDGPU::PRED_SEL_ZERO: + case AMDGPU::PREDICATE_BIT: + return true; + } +} + +bool +R600InstrInfo::isPredicable(MachineInstr *MI) const +{ + return AMDGPUInstrInfo::isPredicable(MI); +} diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h index 72ea151..9bdda7a 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ b/src/gallium/drivers/radeon/R600InstrInfo.h @@ -62,6 +62,9 @@ namespace llvm { DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const; + bool isPredicated(const MachineInstr *MI) const; + + bool isPredicable(MachineInstr *MI) const; }; } // End llvm namespace diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 45598a6..6f2ab1f 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -69,13 +69,16 @@ class R600_ALU { } +def R600_Pred : PredicateOperand; + class R600_1OP inst, string opName, list pattern, InstrItinClass itin = AnyALU> : InstR600 ; @@ -84,7 +87,7 @@ class R600_2OP inst, string opName, list pattern, InstrItinClass itin = AnyALU> : InstR600 inst, string opName, list pattern, InstrItinClass itin = AnyALU> : InstR600 { @@ -102,6 +105,22 @@ class R600_3OP inst, string opName, list pattern, let Op3 = 1; } + + +def PRED_X : AMDGPUInst <(outs R600_Predicate_Bit:$dst), + (ins R600_Reg32:$src0, i32imm:$src1), + "PRED $dst, $src0, $src1", + []> +{ + let DisableEncoding = "$src0"; + field bits<32> Inst; + bits<32> src1; + + let Inst = src1; +} + + + class R600_REDUCTION inst, dag ins, string asm, list pattern, InstrItinClass itin = VecALU> : InstR600 ; class MOV_IMM : InstR600 <0x19, (outs R600_Reg32:$dst), - (ins R600_Reg32:$alu_literal, immType:$imm), + (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm), "MOV_IMM $dst, $imm", [], AnyALU >; diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp index 86bc169..9475241 100644 --- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp +++ b/src/gallium/drivers/radeon/R600RegisterInfo.cpp @@ -37,6 +37,10 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const Reserved.set(AMDGPU::NEG_ONE); Reserved.set(AMDGPU::PV_X); Reserved.set(AMDGPU::ALU_LITERAL_X); + Reserved.set(AMDGPU::PREDICATE_BIT); + Reserved.set(AMDGPU::PRED_SEL_OFF); + Reserved.set(AMDGPU::PRED_SEL_ZERO); + Reserved.set(AMDGPU::PRED_SEL_ONE); for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(), E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) { @@ -72,6 +76,11 @@ unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const case AMDGPU::HALF: case AMDGPU::NEG_HALF: return 252; case AMDGPU::ALU_LITERAL_X: return 253; + case AMDGPU::PREDICATE_BIT: + case AMDGPU::PRED_SEL_OFF: + case AMDGPU::PRED_SEL_ZERO: + case AMDGPU::PRED_SEL_ONE: + return 0; default: return getHWRegIndexGen(reg); } } @@ -86,6 +95,10 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const case AMDGPU::HALF: case AMDGPU::NEG_HALF: case AMDGPU::ALU_LITERAL_X: + case AMDGPU::PREDICATE_BIT: + case AMDGPU::PRED_SEL_OFF: + case AMDGPU::PRED_SEL_ZERO: + case AMDGPU::PRED_SEL_ONE: return 0; default: return getHWRegChanGen(reg); } -- 2.7.4