From 82a5d0c64142990236b40567561b6e99b7158216 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 20 Aug 2012 21:09:00 +0000 Subject: [PATCH] radeon/llvm: Add R600ExpandSpecialInstrs pass This pass expends reduction instructions into a MachineInstrBundle that contains 4 instruction, one for each instruction slot. --- src/gallium/drivers/radeon/AMDGPU.h | 1 + src/gallium/drivers/radeon/AMDGPUInstrInfo.h | 1 + src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp | 6 ++ src/gallium/drivers/radeon/Makefile.sources | 1 + src/gallium/drivers/radeon/R600CodeEmitter.cpp | 26 +++---- .../drivers/radeon/R600ExpandSpecialInstrs.cpp | 91 ++++++++++++++++++++++ 6 files changed, 112 insertions(+), 14 deletions(-) create mode 100644 src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h index 191f495..927e62a 100644 --- a/src/gallium/drivers/radeon/AMDGPU.h +++ b/src/gallium/drivers/radeon/AMDGPU.h @@ -22,6 +22,7 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const TargetData* TD); FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS); +FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); // SI Passes FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h index 31400a7..de3c594 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h @@ -30,6 +30,7 @@ #define MO_FLAG_ABS (1 << 2) #define MO_FLAG_MASK (1 << 3) #define MO_FLAG_PUSH (1 << 4) +#define MO_FLAG_LAST (1 << 5) #define OPCODE_IS_ZERO_INT 0x00000045 #define OPCODE_IS_NOT_ZERO_INT 0x00000042 diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp index b97c0fe..6f15430 100644 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp @@ -158,6 +158,12 @@ bool AMDGPUPassConfig::addPreEmitPass() { PM->add(createAMDGPUCFGPreparationPass(*TM)); PM->add(createAMDGPUCFGStructurizerPass(*TM)); + const AMDGPUSubtarget &ST = TM->getSubtarget(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + PM->add(createR600ExpandSpecialInstrsPass(*TM)); + addPass(FinalizeMachineBundlesID); + } + return false; } diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 3a75ce9..0e9825f 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -35,6 +35,7 @@ CPP_SOURCES := \ AMDGPUInstrInfo.cpp \ AMDGPURegisterInfo.cpp \ R600CodeEmitter.cpp \ + R600ExpandSpecialInstrs.cpp \ R600ISelLowering.cpp \ R600InstrInfo.cpp \ R600KernelParameters.cpp \ diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index 02b6fdb..14e877b 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -50,7 +50,6 @@ private: const R600InstrInfo * TII; bool IsCube; - bool IsReduction; bool IsVector; unsigned currentElement; bool IsLast; @@ -60,7 +59,7 @@ private: public: R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), - _OS(OS), TM(NULL), IsCube(false), IsReduction(false), IsVector(false), + _OS(OS), TM(NULL), IsCube(false), IsVector(false), IsLast(true) { } const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } @@ -159,10 +158,9 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); BB != BB_E; ++BB) { MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { + for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(), + E = MBB.instr_end(); I != E; ++I) { MachineInstr &MI = *I; - IsReduction = TII->isReductionOp(MI.getOpcode()); IsVector = TII->isVector(MI); IsCube = TII->isCubeOp(MI.getOpcode()); if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { @@ -172,7 +170,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { EmitTexInstr(MI); } else if (TII->isFCOp(MI.getOpcode())){ EmitFCInstr(MI); - } else if (IsReduction || IsVector || IsCube) { + } else if (IsVector || IsCube) { IsLast = false; // XXX: On Cayman, some (all?) of the vector instructions only need // to fill the first three slots. @@ -180,7 +178,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { IsLast = (currentElement == 3); EmitALUInstr(MI); } - IsReduction = false; IsVector = false; IsCube = false; } else if (MI.getOpcode() == AMDGPU::RETURN || @@ -310,8 +307,6 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override) // Emit the source channel (1 byte) if (chan_override != -1) { EmitByte(chan_override); - } else if (IsReduction) { - EmitByte(currentElement); } else if (MO.isReg()) { EmitByte(TRI->getHWRegChan(MO.getReg())); } else { @@ -353,7 +348,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO) EmitByte(getHWReg(MO.getReg())); // Emit the element of the destination register (1 byte) - if (IsReduction || IsCube || IsVector) { + if (IsCube || IsVector) { EmitByte(currentElement); } else { EmitByte(TRI->getHWRegChan(MO.getReg())); @@ -367,7 +362,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO) } // Emit writemask (1 byte). - if (((IsReduction || IsVector) && + if ((IsVector && currentElement != TRI->getHWRegChan(MO.getReg())) || MO.getTargetFlags() & MO_FLAG_MASK) { EmitByte(0); @@ -389,11 +384,14 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc) EmitTwoBytes(getBinaryCodeForInstr(MI)); // Emit IsLast (for this instruction group) (1 byte) - if (IsLast) { - EmitByte(1); - } else { + if (!IsLast || + (MI.isInsideBundle() && + !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST))) { EmitByte(0); + } else { + EmitByte(1); } + // Emit isOp3 (1 byte) if (numSrc == 3) { EmitByte(1); diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp new file mode 100644 index 0000000..4c67ba4 --- /dev/null +++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp @@ -0,0 +1,91 @@ +//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Vector, Reduction, and Cube instructions need to fill the entire instruction +// group to work correctly. This pass expands these individual instructions +// into several instructions that will completely fill the instruction group. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600InstrInfo.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + +class R600ExpandSpecialInstrsPass : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + +public: + R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), + TII (static_cast(tm.getInstrInfo())) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "R600 Expand special instructions pass"; + } +}; + +} // End anonymous namespace + +char R600ExpandSpecialInstrsPass::ID = 0; + +FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { + return new R600ExpandSpecialInstrsPass(TM); +} + +bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { + + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + MachineBasicBlock::iterator I = MBB.begin(); + while (I != MBB.end()) { + MachineInstr &MI = *I; + I = llvm::next(I); + + if (!TII->isReductionOp(MI.getOpcode())) { + continue; + } + + // Expand the instruction + for (unsigned Chan = 0; Chan < 4; Chan++) { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Src0 = MI.getOperand(1).getReg(); + unsigned Src1 = MI.getOperand(2).getReg(); + unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); + unsigned NewSrc0 = TRI.getSubReg(Src0, SubRegIndex); + unsigned NewSrc1 = TRI.getSubReg(Src1, SubRegIndex); + unsigned DstBase = TRI.getHWRegIndex(DstReg); + unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); + unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); + Flags |= (Chan == 3 ? MO_FLAG_LAST : 0); + MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true); + NewDstOp.addTargetFlag(Flags); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode())) + .addOperand(NewDstOp) + .addReg(NewSrc0) + .addReg(NewSrc1) + ->setIsInsideBundle(Chan != 0); + } + MI.eraseFromParent(); + } + } + return false; +} -- 2.7.4