namespace gbe
{
+ ///////////////////////////////////////////////////////////////////////////
+ // Various helper functions
+ ///////////////////////////////////////////////////////////////////////////
+
+ INLINE uint32_t getGenType(ir::Type type) {
+ using namespace ir;
+ switch (type) {
+ case TYPE_BOOL: return GEN_TYPE_UW;
+ case TYPE_S8: return GEN_TYPE_B;
+ case TYPE_U8: return GEN_TYPE_UB;
+ case TYPE_S16: return GEN_TYPE_W;
+ case TYPE_U16: return GEN_TYPE_UW;
+ case TYPE_S32: return GEN_TYPE_D;
+ case TYPE_U32: return GEN_TYPE_UD;
+ case TYPE_FLOAT: return GEN_TYPE_F;
+ default: NOT_SUPPORTED; return GEN_TYPE_F;
+ }
+ }
+
+ INLINE uint32_t getGenCompare(ir::Opcode opcode) {
+ using namespace ir;
+ switch (opcode) {
+ case OP_LE: return GEN_CONDITIONAL_LE;
+ case OP_LT: return GEN_CONDITIONAL_L;
+ case OP_GE: return GEN_CONDITIONAL_GE;
+ case OP_GT: return GEN_CONDITIONAL_G;
+ case OP_EQ: return GEN_CONDITIONAL_EQ;
+ case OP_NE: return GEN_CONDITIONAL_NEQ;
+ default: NOT_SUPPORTED; return 0u;
+ };
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // GenContext implementation
+ ///////////////////////////////////////////////////////////////////////////
+
GenContext::GenContext(const ir::Unit &unit, const std::string &name) :
Context(unit, name)
{
GenContext::~GenContext(void) { GBE_DELETE(p); }
+ GenReg GenContext::genReg(ir::Register reg, ir::Type type) {
+ const uint32_t genType = getGenType(type);
+ auto it = RA.find(reg);
+ GBE_ASSERT(it != RA.end());
+ return GenReg::retype(it->second, genType);
+ }
+
+ GenReg GenContext::genRegQn(ir::Register reg, uint32_t quarter, ir::Type type) {
+ GBE_ASSERT(quarter == 2 || quarter == 3 || quarter == 4);
+ GenReg genReg = this->genReg(reg, type);
+ if (this->isScalarReg(reg) == true)
+ return genReg;
+ else {
+ const uint32_t elemSz = typeSize(genReg.file);
+ const uint32_t grfOffset = genReg.nr*GEN_REG_SIZE + elemSz*genReg.subnr;
+ const uint32_t nextOffset = grfOffset + 8*(quarter-1)*elemSz;
+ genReg.nr = nextOffset / GEN_REG_SIZE;
+ genReg.subnr = (nextOffset % GEN_REG_SIZE) / elemSz;
+ return genReg;
+ }
+ }
+
// Per-lane block IPs are always pre-allocated and used for branches. We just
// 0xffff as a fake register for them
static const ir::Register blockIPReg(0xffff);
if (fn.isSpecialReg(reg) == true) continue; // already done
if (fn.getInput(reg) != NULL) continue; // already done
const RegisterData regData = fn.getRegisterData(reg);
- const RegisterFamily family = regData.family;
const uint32_t typeSize = regData.getSize();
- const uint32_t nr = grfOffset / GEN_REG_SIZE;
- const uint32_t subnr = (grfOffset % GEN_REG_SIZE) / typeSize;
- GBE_ASSERT(family == FAMILY_DWORD); // XXX Do the rest
- GBE_ASSERT(grfOffset + simdWidth*typeSize < GEN_GRF_SIZE);
- RA.insert(std::make_pair(reg, GenReg::f16grf(nr, subnr)));
- grfOffset += simdWidth * typeSize;
+ const uint32_t regSize = simdWidth*typeSize;
+ grfOffset = ALIGN(grfOffset, regSize);
+ if (grfOffset + regSize <= GEN_GRF_SIZE) {
+ const uint32_t nr = grfOffset / GEN_REG_SIZE;
+ const uint32_t subnr = (grfOffset % GEN_REG_SIZE) / typeSize;
+ if (simdWidth == 16)
+ RA.insert(std::make_pair(reg, GenReg::f16grf(nr, subnr)));
+ else if (simdWidth == 8)
+ RA.insert(std::make_pair(reg, GenReg::f8grf(nr, subnr)));
+ else
+ NOT_SUPPORTED;
+ grfOffset += simdWidth * typeSize;
+ } else
+ NOT_IMPLEMENTED;
}
}
void GenContext::emitUnaryInstruction(const ir::UnaryInstruction &insn) {
GBE_ASSERT(insn.getOpcode() == ir::OP_MOV);
- p->MOV(reg(insn.getDst(0)), reg(insn.getSrc(0)));
+ p->MOV(genReg(insn.getDst(0)), genReg(insn.getSrc(0)));
}
void GenContext::emitIntMul32x32(const ir::Instruction &insn,
GenReg dst, GenReg src0, GenReg src1)
{
-
+ using namespace ir;
const uint32_t width = p->curr.execWidth;
- const bool src0Scalar = isScalarReg(insn.getSrc(0));
- const bool src1Scalar = isScalarReg(insn.getSrc(1));
-
p->push();
- // Either left part of the 16-wide register or just a simd 8 register
- dst = GenReg::retype(dst, GEN_TYPE_D);
- src1 = GenReg::retype(src1, GEN_TYPE_D);
- src1 = GenReg::retype(src1, GEN_TYPE_D);
- p->curr.execWidth = 8;
- p->curr.quarterControl = GEN_COMPRESSION_Q1;
- p->MUL(GenReg::retype(GenReg::acc(), GEN_TYPE_D), src0, src1);
- p->MACH(GenReg::retype(GenReg::null(), GEN_TYPE_D), src0, src1);
- p->MOV(GenReg::retype(dst, GEN_TYPE_F), GenReg::acc());
-
- // Right part of the 16-wide register now
- if (width == 16) {
- p->curr.noMask = 1;
- GenReg nextSrc0 = src0, nextSrc1 = src1;
- if (src0Scalar == false) nextSrc0 = GenReg::next(src0);
- if (src1Scalar == false) nextSrc1 = GenReg::next(src1);
- p->MUL(GenReg::retype(GenReg::acc(), GEN_TYPE_D), nextSrc0, nextSrc1);
- p->MACH(GenReg::retype(GenReg::null(), GEN_TYPE_D), nextSrc0, nextSrc1);
- p->curr.quarterControl = GEN_COMPRESSION_Q2;
- p->MOV(GenReg::f8grf(116,0), GenReg::acc());
- p->curr.noMask = 0;
- p->MOV(GenReg::retype(GenReg::next(dst), GEN_TYPE_F), GenReg::f8grf(116,0));
- }
+ // Either left part of the 16-wide register or just a simd 8 register
+ dst = GenReg::retype(dst, GEN_TYPE_D);
+ src0 = GenReg::retype(src0, GEN_TYPE_D);
+ src1 = GenReg::retype(src1, GEN_TYPE_D);
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q1;
+ p->MUL(GenReg::retype(GenReg::acc(), GEN_TYPE_D), src0, src1);
+ p->MACH(GenReg::retype(GenReg::null(), GEN_TYPE_D), src0, src1);
+ p->MOV(GenReg::retype(dst, GEN_TYPE_F), GenReg::acc());
+
+ // Right part of the 16-wide register now
+ if (width == 16) {
+ p->curr.noMask = 1;
+ const GenReg nextSrc0 = this->genRegQn(insn.getSrc(0), 2, TYPE_S32);
+ const GenReg nextSrc1 = this->genRegQn(insn.getSrc(1), 2, TYPE_S32);
+ p->MUL(GenReg::retype(GenReg::acc(), GEN_TYPE_D), nextSrc0, nextSrc1);
+ p->MACH(GenReg::retype(GenReg::null(), GEN_TYPE_D), nextSrc0, nextSrc1);
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+ p->MOV(GenReg::f8grf(116,0), GenReg::acc());
+ p->curr.noMask = 0;
+ p->MOV(GenReg::retype(GenReg::next(dst), GEN_TYPE_F), GenReg::f8grf(116,0));
+ }
+
p->pop();
}
+ void GenContext::emitForwardBranch(const ir::BranchInstruction &insn,
+ ir::LabelIndex dst,
+ ir::LabelIndex src)
+ {
+ using namespace ir;
+ const GenReg ip = this->genReg(blockIPReg, TYPE_U16);
+
+ // Inefficient code. If the instruction is predicated, we build the flag
+ // register from the boolean vector
+ if (insn.isPredicated() == true) {
+ const GenReg pred = this->genReg(insn.getPredicateIndex(), TYPE_U16);
+ p->push();
+ p->curr.noMask = 1;
+ p->curr.execWidth = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->MOV(GenReg::flag(0,1), GenReg::flag(0,0));
+ p->pop();
+
+ // Rebuild the flag register by comparing the boolean with 1s
+ p->push();
+ p->curr.flag = 0;
+ p->curr.subFlag = 1;
+ p->CMP(GenReg::null(), GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1));
+
+ // Update the PcIPs
+ p->MOV(ip, GenReg::immuw(uint16_t(dst)));
+ p->pop();
+
+ // We do not emit any jump if we must jump to the next instruction
+ const BasicBlock *curr = insn.getParent();
+ const BasicBlock *next = curr->getNextBlock();
+ const LabelIndex nextLabel = next->getLabelIndex();
+ if (nextLabel == dst) return;
+
+ // It is slightly more complicated than for backward jump. We check that
+ // all PcIPs are greater than the next block IP to be sure that we can
+ // jump
+ p->push();
+ p->curr.flag = 0;
+ p->curr.subFlag = 1;
+ p->CMP(GenReg::null(), GEN_CONDITIONAL_G, pred, GenReg::immuw(nextLabel));
+
+ // Branch to the jump target
+ this->branchPos.insert(std::make_pair(&insn, p->insnNum));
+ if (simdWidth == 8)
+ p->curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
+ else if (simdWidth == 16)
+ p->curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
+ else
+ NOT_SUPPORTED;
+ p->curr.execWidth = 1;
+ p->curr.noMask = 1;
+ p->JMPI(GenReg::immd(0));
+ p->pop();
+
+ } else {
+ // Update the PcIPs
+ p->MOV(ip, GenReg::immuw(uint16_t(dst)));
+
+ // Branch to the jump target
+ this->branchPos.insert(std::make_pair(&insn, p->insnNum));
+ p->push();
+ p->curr.execWidth = 1;
+ p->curr.noMask = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->JMPI(GenReg::immd(0));
+ p->pop();
+ }
+ }
+
+ void GenContext::emitBackwardBranch(const ir::BranchInstruction &insn,
+ ir::LabelIndex dst,
+ ir::LabelIndex src)
+ {
+ using namespace ir;
+ const GenReg ip = this->genReg(blockIPReg, TYPE_U16);
+
+ // Inefficient code. If the instruction is predicated, we build the flag
+ // register from the boolean vector
+ if (insn.isPredicated() == true) {
+ const GenReg pred = this->genReg(insn.getPredicateIndex(), TYPE_U16);
+ p->push();
+ p->curr.noMask = 1;
+ p->curr.execWidth = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->MOV(GenReg::flag(0,1), GenReg::flag(0,0));
+ p->pop();
+
+ // Rebuild the flag register by comparing the boolean with 1s
+ p->push();
+ p->curr.flag = 0;
+ p->curr.subFlag = 1;
+ p->CMP(GenReg::null(), GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1));
+
+ // Update the PcIPs
+ p->MOV(ip, GenReg::immuw(uint16_t(dst)));
+
+ // Branch to the jump target
+ this->branchPos.insert(std::make_pair(&insn, p->insnNum));
+ if (simdWidth == 8)
+ p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
+ else if (simdWidth == 16)
+ p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
+ else
+ NOT_SUPPORTED;
+ p->curr.execWidth = 1;
+ p->curr.noMask = 1;
+ p->JMPI(GenReg::immd(0));
+ p->pop();
+
+ } else {
+
+ // Update the PcIPs
+ p->MOV(ip, GenReg::immuw(uint16_t(dst)));
+
+ // Branch to the jump target
+ this->branchPos.insert(std::make_pair(&insn, p->insnNum));
+ p->push();
+ p->curr.execWidth = 1;
+ p->curr.noMask = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->JMPI(GenReg::immd(0));
+ p->pop();
+ }
+ }
+
void GenContext::emitBinaryInstruction(const ir::BinaryInstruction &insn) {
using namespace ir;
const Opcode opcode = insn.getOpcode();
const Type type = insn.getType();
- GenReg dst = reg(insn.getDst(0));
- GenReg src0 = reg(insn.getSrc(0));
- GenReg src1 = reg(insn.getSrc(1));
- GBE_ASSERT(isScalarReg(insn.getDst(0)) == false);
-
- // Default type is FLOAT
- GBE_ASSERT(type == TYPE_U32 || type == TYPE_S32 || type == TYPE_FLOAT);
- if (type == TYPE_U32) {
- dst = GenReg::retype(dst, GEN_TYPE_UD);
- src0 = GenReg::retype(src0, GEN_TYPE_UD);
- src1 = GenReg::retype(src1, GEN_TYPE_UD);
- } else if (type == TYPE_S32) {
- dst = GenReg::retype(dst, GEN_TYPE_D);
- src0 = GenReg::retype(src0, GEN_TYPE_D);
- src1 = GenReg::retype(src1, GEN_TYPE_D);
- }
+ GenReg dst = this->genReg(insn.getDst(0), type);
+ GenReg src0 = this->genReg(insn.getSrc(0), type);
+ GenReg src1 = this->genReg(insn.getSrc(1), type);
// Output the binary instruction
switch (opcode) {
void GenContext::emitTernaryInstruction(const ir::TernaryInstruction &insn) {}
void GenContext::emitSelectInstruction(const ir::SelectInstruction &insn) {}
- void GenContext::emitCompareInstruction(const ir::CompareInstruction &insn) {}
- void GenContext::emitConvertInstruction(const ir::ConvertInstruction &insn) {}
- void GenContext::emitBranchInstruction(const ir::BranchInstruction &insn) {
+
+ void GenContext::emitCompareInstruction(const ir::CompareInstruction &insn) {
using namespace ir;
const Opcode opcode = insn.getOpcode();
- GBE_ASSERT(opcode == OP_RET);
+ const Type type = insn.getType();
+ const uint32_t typeSize = getSize(type);
+ const uint32_t genCmp = getGenCompare(opcode);
+ const GenReg dst = this->genReg(insn.getDst(0), TYPE_BOOL);
+ const GenReg src0 = this->genReg(insn.getSrc(0), type);
+ const GenReg src1 = this->genReg(insn.getSrc(1), type);
+
+ // Copy the predicate to save it basically
p->push();
- p->curr.execWidth = 8;
- p->curr.noMask = 1;
- p->MOV(GenReg::f8grf(127,0), GenReg::f8grf(0,0));
+ p->curr.noMask = 1;
+ p->curr.execWidth = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->MOV(GenReg::flag(0,1), GenReg::flag(0,0));
+ p->pop();
+
+ // Emit the compare instruction now. dwords require to push two SIMD8
+ // instructions
+ GBE_ASSERT(typeSize == 2 || typeSize == 4);
+ p->push();
+ if (this->simdWidth == 8 || typeSize == 2) {
+ p->curr.flag = 0;
+ p->curr.subFlag = 1;
+ p->CMP(GenReg::null(), genCmp, src0, src1);
+ } else if (this->simdWidth == 16) {
+ const GenReg nextSrc0 = this->genRegQn(insn.getSrc(0), 2, type);
+ const GenReg nextSrc1 = this->genRegQn(insn.getSrc(1), 2, type);
+ p->curr.flag = 0;
+ p->curr.subFlag = 1;
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q1;
+ p->CMP(GenReg::null(), genCmp, src0, src1);
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+ p->CMP(GenReg::null(), genCmp, nextSrc0, nextSrc1);
+ } else
+ NOT_SUPPORTED;
+ p->pop();
+
+ // We emit a very unoptimized code where we store the resulting mask in a GRF
+ p->push();
+ p->curr.flag = 0;
+ p->curr.subFlag = 1;
+ p->SEL(dst, GenReg::uw1grf(127,0), GenReg::immuw(0));
p->pop();
- p->EOT(127);
+ }
+
+ void GenContext::emitConvertInstruction(const ir::ConvertInstruction &insn) {}
+ void GenContext::emitBranchInstruction(const ir::BranchInstruction &insn) {
+ using namespace ir;
+ const Opcode opcode = insn.getOpcode();
+ if (opcode == OP_RET) {
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.execWidth = 8;
+ p->curr.noMask = 1;
+ p->MOV(GenReg::f8grf(127,0), GenReg::f8grf(0,0));
+ p->EOT(127);
+ p->pop();
+ } else if (opcode == OP_BRA) {
+ const LabelIndex dst = insn.getLabelIndex();
+ const LabelIndex src = insn.getParent()->getLabelIndex();
+
+ // We handle foward and backward branches differently
+ if (uint32_t(dst) <= uint32_t(src))
+ this->emitBackwardBranch(insn, dst, src);
+ else
+ this->emitForwardBranch(insn, dst, src);
+ } else
+ NOT_IMPLEMENTED;
}
void GenContext::emitTextureInstruction(const ir::TextureInstruction &insn) {}
using namespace ir;
const Type type = insn.getType();
const Immediate imm = insn.getImmediate();
- const GenReg dst = reg(insn.getDst(0));
+ const GenReg dst = this->genReg(insn.getDst(0), type);
switch (type) {
- case TYPE_U32: p->MOV(GenReg::retype(dst, GEN_TYPE_UD), GenReg::immud(imm.data.u32)); break;
- case TYPE_S32: p->MOV(GenReg::retype(dst, GEN_TYPE_D), GenReg::immd(imm.data.s32)); break;
+ case TYPE_U32: p->MOV(dst, GenReg::immud(imm.data.u32)); break;
+ case TYPE_S32: p->MOV(dst, GenReg::immd(imm.data.s32)); break;
case TYPE_FLOAT: p->MOV(dst, GenReg::immf(imm.data.f32)); break;
default: NOT_SUPPORTED;
}
GBE_ASSERT(insn.getValueNum() == 1);
GBE_ASSERT(insn.isAligned() == true);
GBE_ASSERT(this->simdWidth <= 16);
- const GenReg address = reg(insn.getAddress());
- const GenReg value = reg(insn.getValue(0));
- if (this->simdWidth == 8 || this->simdWidth == 16)
- p->UNTYPED_READ(value, address, 0, 1);
- else
+ const GenReg address = this->genReg(insn.getAddress());
+ const GenReg value = this->genReg(insn.getValue(0));
+ if (this->simdWidth == 8 || this->simdWidth == 16) {
+ if (isScalarReg(insn.getAddress()) == true) {
+ if (this->simdWidth == 8) {
+ p->MOV(GenReg::f8grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
+ p->UNTYPED_READ(value, GenReg::f8grf(112, 0), 0, 1);
+ } else if (this->simdWidth == 16) {
+ p->MOV(GenReg::f16grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
+ p->UNTYPED_READ(value, GenReg::f16grf(112, 0), 0, 1);
+ }
+ } else
+ p->UNTYPED_READ(value, address, 0, 1);
+ } else
NOT_IMPLEMENTED;
}
GBE_ASSERT(insn.getValueNum() == 1);
GBE_ASSERT(insn.isAligned() == true);
GBE_ASSERT(this->simdWidth <= 16);
- const GenReg address = reg(insn.getAddress());
- const GenReg value = reg(insn.getValue(0));
+ const GenReg address = this->genReg(insn.getAddress());
+ const GenReg value = this->genReg(insn.getValue(0));
// XXX remove that later. Now we just copy everything to GRFs to make it
// contiguous
if (this->simdWidth == 8) {
}
void GenContext::emitFenceInstruction(const ir::FenceInstruction &insn) {}
void GenContext::emitLabelInstruction(const ir::LabelInstruction &insn) {
+ const ir::LabelIndex label = insn.getLabelIndex();
const GenReg dst = GenReg::retype(GenReg::null(), GEN_TYPE_UW);
- const GenReg src0 = this->reg(blockIPReg);
- const GenReg src1 = GenReg::immuw(insn.getLabelIndex());
-#if 0
- p->push();
- p->curr.predicated = 0;
- p->MOV(GenReg::flag(0,1), GenReg::immuw(0xffff));
- p->pop();
-#endif
+ const GenReg src0 = this->genReg(blockIPReg);
+ const GenReg src1 = GenReg::immuw(label);
+ // Labels are branch targets. We save the position of each label in the
+ // stream
+ this->labelPos.insert(std::make_pair(label, p->insnNum));
+
+ // Emit the mask computation at the head of each basic block
p->push();
- p->curr.predicated = 0;
- p->curr.flag = 0;
- p->CMP(dst, GEN_CONDITIONAL_LE, src0, src1);
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.flag = 0;
+ p->CMP(dst, GEN_CONDITIONAL_LE, GenReg::retype(src0, GEN_TYPE_UW), src1);
p->pop();
}
void GenContext::emitInstructionStream(void) {
using namespace ir;
+
+ // We push 0 in a scalar register to make select(pred, 0, 1) faster
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.execWidth = 1;
+ p->MOV(GenReg::uw16grf(127,0), GenReg::immuw(1));
+ p->pop();
+
+ // Emit all the other instructions
fn.foreachInstruction([&](const Instruction &insn) {
const Opcode opcode = insn.getOpcode();
switch (opcode) {
#define DECL_INSN(OPCODE, FAMILY) \
- case OP_##OPCODE: this->emit##FAMILY(cast<FAMILY>(insn)); break;
+ case OP_##OPCODE: this->emit##FAMILY(cast<FAMILY>(insn)); break;
#include "ir/instruction.hxx"
#undef DECL_INSN
}
});
}
+ void GenContext::patchBranches(void) {
+ using namespace ir;
+ for (auto pair : branchPos) {
+ const BranchInstruction *insn = cast<BranchInstruction>(pair.first);
+ const LabelIndex label = insn->getLabelIndex();
+ const int32_t insnID = pair.second;
+ const int32_t targetID = labelPos.find(label)->second;
+ p->patchJMPI(insnID, (targetID-insnID-1) * 2);
+ }
+ }
+
BVAR(OCL_OUTPUT_ASM, false);
void GenContext::emitCode(void) {
GenKernel *genKernel = static_cast<GenKernel*>(this->kernel);
this->allocateRegister();
this->emitInstructionStream();
+ this->patchBranches();
genKernel->insnNum = p->insnNum;
genKernel->insns = GBE_NEW_ARRAY(GenInstruction, genKernel->insnNum);
std::memcpy(genKernel->insns, p->store, genKernel->insnNum * sizeof(GenInstruction));
this->curr.execWidth = simdWidth;
this->curr.quarterControl = GEN_COMPRESSION_Q1;
this->curr.noMask = 0;
- this->curr.predicated = 1;
this->curr.flag = 0;
this->curr.subFlag = 0;
+ this->curr.predicate = GEN_PREDICATE_NORMAL;
this->curr.inversePredicate = 0;
}
insn->header.execution_size = GEN_WIDTH_8;
else if (this->curr.execWidth == 16)
insn->header.execution_size = GEN_WIDTH_16;
+ else if (this->curr.execWidth == 1)
+ insn->header.execution_size = GEN_WIDTH_1;
else
NOT_IMPLEMENTED;
insn->header.quarter_control = this->curr.quarterControl;
insn->header.mask_control = this->curr.noMask;
insn->bits2.ia1.flag_reg_nr = this->curr.flag;
insn->bits2.ia1.flag_sub_reg_nr = this->curr.subFlag;
- if (this->curr.predicated) {
- insn->header.predicate_control = GEN_PREDICATE_NORMAL;
+ if (this->curr.predicate != GEN_PREDICATE_NONE) {
+ insn->header.predicate_control = this->curr.predicate;
insn->header.predicate_inverse = this->curr.inversePredicate;
}
}
return insn;
}
- static GenInstruction *brw_alu1(GenEmitter *p,
- uint32_t opcode,
- GenReg dest,
- GenReg src)
+ INLINE GenInstruction *alu1(GenEmitter *p,
+ uint32_t opcode,
+ GenReg dest,
+ GenReg src)
{
GenInstruction *insn = p->next(opcode);
p->setHeader(insn);
return insn;
}
- static GenInstruction *brw_alu2(GenEmitter *p,
- uint32_t opcode,
- GenReg dest,
- GenReg src0,
- GenReg src1)
+ INLINE GenInstruction *alu2(GenEmitter *p,
+ uint32_t opcode,
+ GenReg dest,
+ GenReg src0,
+ GenReg src1)
{
GenInstruction *insn = p->next(opcode);
p->setHeader(insn);
return reg.subnr / 4;
}
- static GenInstruction *brw_alu3(GenEmitter *p,
- uint32_t opcode,
- GenReg dest,
- GenReg src0,
- GenReg src1,
- GenReg src2)
+ static GenInstruction *alu3(GenEmitter *p,
+ uint32_t opcode,
+ GenReg dest,
+ GenReg src0,
+ GenReg src1,
+ GenReg src2)
{
GenInstruction *insn = p->next(opcode);
#define ALU1(OP) \
GenInstruction *GenEmitter::OP(GenReg dest, GenReg src0) \
{ \
- return brw_alu1(this, GEN_OPCODE_##OP, dest, src0); \
+ return alu1(this, GEN_OPCODE_##OP, dest, src0); \
}
#define ALU2(OP) \
GenInstruction *GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1) \
{ \
- return brw_alu2(this, GEN_OPCODE_##OP, dest, src0, src1); \
+ return alu2(this, GEN_OPCODE_##OP, dest, src0, src1); \
}
#define ALU3(OP) \
GenInstruction *GenEmitter::OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2) \
{ \
- return brw_alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
+ return alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
}
ALU1(MOV)
GenInstruction *GenEmitter::MACH(GenReg dest, GenReg src0, GenReg src1)
{
- GenInstruction *insn = brw_alu2(this, GEN_OPCODE_MACH, dest, src0, src1);
+ GenInstruction *insn = alu2(this, GEN_OPCODE_MACH, dest, src0, src1);
insn->header.acc_wr_control = 1;
return insn;
}
assert(src0.type != GEN_TYPE_D);
}
- return brw_alu2(this, GEN_OPCODE_ADD, dest, src0, src1);
+ return alu2(this, GEN_OPCODE_ADD, dest, src0, src1);
}
GenInstruction *GenEmitter::MUL(GenReg dest, GenReg src0, GenReg src1)
assert(src1.file != GEN_ARCHITECTURE_REGISTER_FILE ||
src1.nr != GEN_ARF_ACCUMULATOR);
- return brw_alu2(this, GEN_OPCODE_MUL, dest, src0, src1);
+ return alu2(this, GEN_OPCODE_MUL, dest, src0, src1);
}
this->setSrc1(insn, GenReg::immud(0x0));
}
- GenInstruction *GenEmitter::JMPI(GenReg dest, GenReg src0, GenReg src1)
+ void GenEmitter::JMPI(GenReg src)
{
- GenInstruction *insn = brw_alu2(this, GEN_OPCODE_JMPI, dest, src0, src1);
- insn->header.execution_size = 1;
- insn->header.mask_control = GEN_MASK_DISABLE;
- return insn;
+ alu2(this, GEN_OPCODE_JMPI, GenReg::ip(), GenReg::ip(), src);
+ }
+
+ void GenEmitter::patchJMPI(uint32_t insnID, int32_t jumpDistance)
+ {
+ GenInstruction &insn = this->store[insnID];
+ assert(insnID < this->insnNum);
+ assert(insn.header.opcode == GEN_OPCODE_JMPI);
+ this->setSrc1(&insn, GenReg::retype(GenReg::immw(jumpDistance), GEN_TYPE_D));
}
/* To integrate with the above, it makes sense that the comparison
this->setDst(insn, dest);
this->setSrc0(insn, src0);
this->setSrc1(insn, src1);
-#if 0
-
- /* Make it so that future instructions will use the computed flag
- * value until brw_set_predicate_control_flag_value() is called
- * again.
- */
- if (dest.file == GEN_ARCHITECTURE_REGISTER_FILE &&
- dest.nr == 0) {
- this->current->header.predicate_control = GEN_PREDICATE_NORMAL;
- this->flag_value = 0xff;
- }
-#endif
}
/* Issue 'wait' instruction for n1, host could program MMIO