From 9c46cb23685d0b28d5b9124f6dd26f27d028ed30 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 6 Jun 2012 11:35:48 -0400 Subject: [PATCH] radeon/llvm: Fix MULLO* instructions on Cayman On Cayman, the MULLO* instructions must fill all slots in an instruction group. --- src/gallium/drivers/radeon/R600CodeEmitter.cpp | 18 ++++++++----- src/gallium/drivers/radeon/R600InstrInfo.cpp | 5 ++++ src/gallium/drivers/radeon/R600InstrInfo.h | 7 ++++- src/gallium/drivers/radeon/R600Instructions.td | 37 +++++++++++++++++++++----- 4 files changed, 53 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index f75c2f5..42d7918 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -21,6 +21,7 @@ #include "AMDILCodeEmitter.h" #include "AMDILInstrInfo.h" #include "AMDILUtilityFunctions.h" +#include "R600InstrInfo.h" #include "R600RegisterInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -50,6 +51,7 @@ private: bool isCube; bool isReduction; + bool isVector; unsigned currentElement; bool isLast; @@ -58,7 +60,7 @@ private: public: R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), - _OS(OS), TM(NULL), isCube(false), isReduction(false), + _OS(OS), TM(NULL), isCube(false), isReduction(false), isVector(false), isLast(true) { } const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } @@ -144,6 +146,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { TM = &MF.getTarget(); MRI = &MF.getRegInfo(); TRI = static_cast(TM->getRegisterInfo()); + const R600InstrInfo * TII = static_cast(TM->getInstrInfo()); const AMDILSubtarget &STM = TM->getSubtarget(); std::string gpu = STM.getDeviceName(); @@ -157,6 +160,8 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; + isReduction = AMDGPU::isReductionOp(MI.getOpcode()); + isVector = TII->isVector(MI); if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { continue; } @@ -164,14 +169,14 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { emitTexInstr(MI); } else if (AMDGPU::isFCOp(MI.getOpcode())){ emitFCInstr(MI); - } else if (AMDGPU::isReductionOp(MI.getOpcode())) { - isReduction = true; + } else if (isReduction || isVector) { isLast = false; for (currentElement = 0; currentElement < 4; currentElement++) { isLast = (currentElement == 3); emitALUInstr(MI); } isReduction = false; + isVector = false; } else if (AMDGPU::isCubeOp(MI.getOpcode())) { isCube = true; isLast = false; @@ -389,7 +394,7 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO) emitByte(getHWReg(MO.getReg())); // Emit the element of the destination register (1 byte) - if (isReduction || isCube) { + if (isReduction || isCube || isVector) { emitByte(currentElement); } else { emitByte(TRI->getHWRegChan(MO.getReg())); @@ -403,8 +408,9 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO) } // Emit writemask (1 byte). - if ((isReduction && currentElement != TRI->getHWRegChan(MO.getReg())) - || MO.getTargetFlags() & MO_FLAG_MASK) { + if (((isReduction || isVector) && + currentElement != TRI->getHWRegChan(MO.getReg())) + || MO.getTargetFlags() & MO_FLAG_MASK) { emitByte(0); } else { emitByte(1); diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index 3d65e73..d1246d3 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -33,6 +33,11 @@ bool R600InstrInfo::isTrig(const MachineInstr &MI) const return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; } +bool R600InstrInfo::isVector(const MachineInstr &MI) const +{ + return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; +} + void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h index a7a65d5..f2a1098 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ b/src/gallium/drivers/radeon/R600InstrInfo.h @@ -43,6 +43,10 @@ namespace llvm { bool isTrig(const MachineInstr &MI) const; + /// isVector - Vector instructions are instructions that must fill all + /// instruction slots within an instruction group. + bool isVector(const MachineInstr &MI) const; + virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const; @@ -59,7 +63,8 @@ namespace R600_InstFlag { REDUCTION = (1 << 2), FC = (1 << 3), TRIG = (1 << 4), - OP3 = (1 << 5) + OP3 = (1 << 5), + VECTOR = (1 << 6) }; } diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 12900fb..617961a 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -18,8 +18,9 @@ class InstR600 inst, dag outs, dag ins, string asm, list pattern, : AMDGPUInst { field bits<32> Inst; - bit Trig = 0; + bit Trig = 0; bit Op3 = 0; + bit isVector = 0; let Inst = inst; let Namespace = "AMDIL"; @@ -31,6 +32,10 @@ class InstR600 inst, dag outs, dag ins, string asm, list pattern, let TSFlags{4} = Trig; let TSFlags{5} = Op3; + + // Vector instructions are instructions that must fill all slots in an + // instruction group + let TSFlags{6} = isVector; } class InstR600ISA pattern> : @@ -784,6 +789,19 @@ class TRIG_HELPER_r700 : Pat < >; */ +//===----------------------------------------------------------------------===// +// Evergreen Only instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isEG] in { + +def MULLO_INT_eg : MULLO_INT_Common<0x8F>; +def MULHI_INT_eg : MULHI_INT_Common<0x90>; +def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; +def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; + +} // End Predicates = [isEG] + /* ------------------------------- */ /* Evergreen / Cayman Instructions */ /* ------------------------------- */ @@ -812,10 +830,6 @@ class TRIG_eg : Pat< def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; - def MULLO_INT_eg : MULLO_INT_Common<0x8F>; - def MULHI_INT_eg : MULHI_INT_Common<0x90>; - def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; - def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; def DOT4_eg : DOT4_Common<0xBE>; def CUBE_eg : CUBE_Common<0xC0>; @@ -893,8 +907,17 @@ def VTX_READ_GLOBAL_eg : VTX_READ_eg <1, let Predicates = [isCayman] in { - /* XXX: I'm not sure if this opcode is correct. */ - def RECIP_UINT_cm : RECIP_UINT_Common<0x77>; +let isVector = 1 in { + +def MULLO_INT_cm : MULLO_INT_Common<0x8F>; +def MULHI_INT_cm : MULHI_INT_Common<0x90>; +def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; +def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; + +} // End isVector = 1 + +/* XXX: I'm not sure if this opcode is correct. */ +def RECIP_UINT_cm : RECIP_UINT_Common<0x77>; } // End isCayman -- 2.7.4