this->liveness = GBE_NEW(ir::Liveness, (ir::Function&) fn);
this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness);
this->partitioner = GBE_NEW(RegisterFilePartitioner);
- this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
+ if (fn.getSimdWidth() == 0)
+ this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
+ else
+ this->simdWidth = fn.getSimdWidth();
+
}
Context::~Context(void) {
GBE_SAFE_DELETE(this->partitioner);
* \file context.hpp
* \author Benjamin Segovia <benjamin.segovia@intel.com>
*/
-
#ifndef __GBE_CONTEXT_HPP__
#define __GBE_CONTEXT_HPP__
* \file gen_context.cpp
* \author Benjamin Segovia <benjamin.segovia@intel.com>
*/
+
#include "backend/gen_context.hpp"
#include "backend/gen_program.hpp"
#include "backend/gen_defs.hpp"
GBE_DELETE(this->p);
}
- bool GenContext::isScalarOrBool(ir::Register reg) const {
- if (this->isScalarReg(reg))
- return true;
- else {
- const ir::RegisterFamily family = fn.getRegisterFamily(reg);
- return family == ir::FAMILY_BOOL;
- }
- }
-
/*! XXX Make both structures the same! */
INLINE void setInstructionState(GenInstructionState &dst,
const SelectionState &src)
const GenReg src = ra->genReg(insn.src[0]);
switch (insn.opcode) {
case SEL_OP_MOV: p->MOV(dst, src); break;
+ case SEL_OP_NOT: p->NOT(dst, src); break;
default: NOT_IMPLEMENTED;
}
}
const GenReg src0 = ra->genReg(insn.src[0]);
const GenReg src1 = ra->genReg(insn.src[1]);
switch (insn.opcode) {
+ case SEL_OP_SEL: p->SEL(dst, src0, src1); break;
case SEL_OP_AND: p->AND(dst, src0, src1); break;
- case SEL_OP_OR: p->OR(dst, src0, src1); break;
+ case SEL_OP_OR: p->OR (dst, src0, src1); break;
case SEL_OP_XOR: p->XOR(dst, src0, src1); break;
case SEL_OP_SHR: p->SHR(dst, src0, src1); break;
case SEL_OP_SHL: p->SHL(dst, src0, src1); break;
}
}
- void GenContext::emitSelectInstruction(const SelectionInstruction &insn) {
- NOT_IMPLEMENTED;
- }
-
void GenContext::emitNoOpInstruction(const SelectionInstruction &insn) {
NOT_IMPLEMENTED;
}
const GenReg dst = ra->genReg(insn.dst[0]);
const GenReg src0 = ra->genReg(insn.src[0]);
const GenReg src1 = ra->genReg(insn.src[1]);
- const uint32_t function = insn.function;
+ const uint32_t function = insn.extra.function;
p->MATH(dst, function, src0, src1);
}
void GenContext::emitCompareInstruction(const SelectionInstruction &insn) {
const GenReg src0 = ra->genReg(insn.src[0]);
const GenReg src1 = ra->genReg(insn.src[1]);
- p->CMP(insn.function, src0, src1);
+ p->CMP(insn.extra.function, src0, src1);
}
void GenContext::emitJumpInstruction(const SelectionInstruction &insn) {
void GenContext::emitUntypedReadInstruction(const SelectionInstruction &insn) {
const GenReg dst = ra->genReg(insn.dst[0]);
const GenReg src = ra->genReg(insn.src[0]);
- const uint32_t bti = insn.function;
- const uint32_t elemNum = insn.elem;
+ const uint32_t bti = insn.extra.function;
+ const uint32_t elemNum = insn.extra.elem;
p->UNTYPED_READ(dst, src, bti, elemNum);
}
void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
const GenReg src = ra->genReg(insn.src[0]);
- const uint32_t bti = insn.function;
- const uint32_t elemNum = insn.elem;
+ const uint32_t bti = insn.extra.function;
+ const uint32_t elemNum = insn.extra.elem;
p->UNTYPED_WRITE(src, bti, elemNum);
}
void GenContext::emitByteGatherInstruction(const SelectionInstruction &insn) {
const GenReg dst = ra->genReg(insn.dst[0]);
const GenReg src = ra->genReg(insn.src[0]);
- const uint32_t bti = insn.function;
- const uint32_t elemSize = insn.elem;
+ const uint32_t bti = insn.extra.function;
+ const uint32_t elemSize = insn.extra.elem;
p->BYTE_GATHER(dst, src, bti, elemSize);
}
void GenContext::emitByteScatterInstruction(const SelectionInstruction &insn) {
const GenReg src = ra->genReg(insn.src[0]);
- const uint32_t bti = insn.function;
- const uint32_t elemSize = insn.elem;
+ const uint32_t bti = insn.extra.function;
+ const uint32_t elemSize = insn.extra.elem;
p->BYTE_SCATTER(src, bti, elemSize);
}
+ void GenContext::emitRegionInstruction(const SelectionInstruction &insn) {
+ GBE_ASSERT(insn.dst[0].width == GEN_WIDTH_8 ||
+ insn.dst[0].width == GEN_WIDTH_16);
+ const GenReg src = ra->genReg(insn.src[0]);
+ const GenReg dst = ra->genReg(insn.dst[1]);
+ const GenReg final = ra->genReg(insn.dst[0]);
+
+ // Region dimensions
+ const uint32_t offset = insn.extra.offset;
+ const uint32_t width = insn.extra.width;
+ const uint32_t height = simdWidth / insn.extra.width;
+ const uint32_t vstride = insn.extra.vstride;
+ const uint32_t hstride = insn.extra.hstride;
+
+ // Region spanning in the grf
+ const uint32_t start = src.nr * GEN_REG_SIZE + src.subnr + offset * sizeof(int);
+ const uint32_t end = start + insn.srcNum * simdWidth * sizeof(int);
+ GBE_ASSERT(simdWidth % width == 0);
+
+ // Right now we simply emit scalar MOVs instead of the region
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.execWidth = 1;
+ p->curr.noMask = 1;
+ uint32_t dstOffset = dst.nr * GEN_REG_SIZE + dst.subnr;
+ for (uint32_t y = 0; y < height; ++y) {
+ uint32_t srcOffset = start + sizeof(int) * vstride * y;
+ for (uint32_t x = 0; x < width; ++x,
+ srcOffset += sizeof(int) * hstride,
+ dstOffset += sizeof(int))
+ {
+ const uint32_t dstnr = dstOffset / GEN_REG_SIZE;
+ const uint32_t dstsubnr = (dstOffset % GEN_REG_SIZE) / sizeof(int);
+ const GenReg dstReg = GenReg::f1grf(dstnr, dstsubnr);
+ if (srcOffset + sizeof(int) > end)
+ p->MOV(dstReg, GenReg::immf(0.f));
+ else {
+ GBE_ASSERT(srcOffset % sizeof(int) == 0);
+ const uint32_t srcnr = srcOffset / GEN_REG_SIZE;
+ const uint32_t srcsubnr = (srcOffset % GEN_REG_SIZE) / sizeof(int);
+ const GenReg srcReg = GenReg::f1grf(srcnr, srcsubnr);
+ p->MOV(dstReg, srcReg);
+ }
+ }
+ }
+ p->pop();
+ p->MOV(GenReg::retype(final, GEN_TYPE_F), GenReg::retype(dst, GEN_TYPE_F));
+ }
+
+ void GenContext::emitRGatherInstruction(const SelectionInstruction &insn) {
+ const GenReg index0 = GenReg::retype(ra->genReg(insn.src[0]), GEN_TYPE_UW);
+ const GenReg dst0 = GenReg::retype(ra->genReg(insn.dst[0]), GEN_TYPE_F);
+ const GenReg src = ra->genReg(insn.src[1]);
+ const uint32_t offset = src.nr * GEN_REG_SIZE + src.subnr;
+ p->push();
+ p->curr.execWidth = 8;
+ p->SHL(GenReg::addr8(0), index0, GenReg::immuw(2));
+ p->ADD(GenReg::addr8(0), GenReg::addr8(0), GenReg::immuw(offset));
+ p->MOV(dst0, GenReg::indirect(GEN_TYPE_F, 0, GEN_WIDTH_8));
+ p->pop();
+
+ if (simdWidth == 16) {
+ const GenReg dst1 = GenReg::Qn(dst0, 1);
+ const GenReg index1 = GenReg::Qn(index0, 1);
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+ p->SHL(GenReg::addr8(0), index1, GenReg::immuw(2));
+ p->ADD(GenReg::addr8(0), GenReg::addr8(0), GenReg::immuw(offset));
+ p->MOV(dst1, GenReg::indirect(GEN_TYPE_F, 0, GEN_WIDTH_8));
+ p->pop();
+ }
+ }
+
+ void GenContext::emitOBReadInstruction(const SelectionInstruction &insn) {
+ const GenReg dst = ra->genReg(insn.dst[0]);
+ const GenReg addr = ra->genReg(insn.src[0]);
+ const GenReg first = GenReg::ud1grf(addr.nr,addr.subnr/sizeof(float));
+ GenReg header;
+ if (simdWidth == 8)
+ header = GenReg::retype(ra->genReg(insn.src[1]), GEN_TYPE_F);
+ else
+ header = GenReg::retype(GenReg::Qn(ra->genReg(insn.src[1]),1), GEN_TYPE_F);
+
+ p->push();
+ // Copy r0 into the header first
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->MOV(header, GenReg::f8grf(0,0));
+
+ // Update the header with the current address
+ p->curr.execWidth = 1;
+ const uint32_t nr = header.nr;
+ const uint32_t subnr = header.subnr / sizeof(float);
+ p->SHR(GenReg::ud1grf(nr, subnr+2), first, GenReg::immud(4));
+
+ // Put zero in the general state base address
+ p->MOV(GenReg::f1grf(nr, subnr+5), GenReg::immf(0));
+
+ // Now read the data
+ p->OBREAD(dst, header, insn.extra.function, insn.extra.elem);
+ p->pop();
+ }
+
+ void GenContext::emitOBWriteInstruction(const SelectionInstruction &insn) {
+ const GenReg addr = ra->genReg(insn.src[2]);
+ const GenReg first = GenReg::ud1grf(addr.nr,addr.subnr/sizeof(float));
+ GenReg header;
+ if (simdWidth == 8)
+ header = GenReg::retype(ra->genReg(insn.src[0]), GEN_TYPE_F);
+ else
+ header = GenReg::retype(GenReg::Qn(ra->genReg(insn.src[0]),1), GEN_TYPE_F);
+
+ p->push();
+ // Copy r0 into the header first
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->MOV(header, GenReg::f8grf(0,0));
+
+ // Update the header with the current address
+ p->curr.execWidth = 1;
+ const uint32_t nr = header.nr;
+ const uint32_t subnr = header.subnr / sizeof(float);
+ p->SHR(GenReg::ud1grf(nr, subnr+2), first, GenReg::immud(4));
+
+ // Put zero in the general state base address
+ p->MOV(GenReg::f1grf(nr, subnr+5), GenReg::immf(0));
+
+ // Now read the data
+ p->OBWRITE(header, insn.extra.function, insn.extra.elem);
+ p->pop();
+ }
+
BVAR(OCL_OUTPUT_ASM, false);
void GenContext::emitCode(void) {
GenKernel *genKernel = static_cast<GenKernel*>(this->kernel);
void emitInstructionStream(void);
/*! Set the correct target values for the branches */
void patchBranches(void);
- /*! Bool registers will use scalar words. So we will consider them as
- * scalars in Gen backend
- */
- bool isScalarOrBool(ir::Register reg) const;
/*! Forward ir::Function method */
INLINE bool isSpecialReg(ir::Register reg) const {
return fn.isSpecialReg(reg);
void emitLabelInstruction(const SelectionInstruction &insn);
void emitUnaryInstruction(const SelectionInstruction &insn);
void emitBinaryInstruction(const SelectionInstruction &insn);
- void emitSelectInstruction(const SelectionInstruction &insn);
void emitCompareInstruction(const SelectionInstruction &insn);
void emitJumpInstruction(const SelectionInstruction &insn);
void emitEotInstruction(const SelectionInstruction &insn);
void emitUntypedWriteInstruction(const SelectionInstruction &insn);
void emitByteGatherInstruction(const SelectionInstruction &insn);
void emitByteScatterInstruction(const SelectionInstruction &insn);
+ void emitRegionInstruction(const SelectionInstruction &insn);
+ void emitRGatherInstruction(const SelectionInstruction &insn);
+ void emitOBReadInstruction(const SelectionInstruction &insn);
+ void emitOBWriteInstruction(const SelectionInstruction &insn);
/*! Implements base class */
virtual Kernel *allocateKernel(void);
#define GEN_UNTYPED_WRITE 13
#define GEN_BYTE_GATHER 4
#define GEN_BYTE_SCATTER 12
+#define GEN_OBLOCK_READ 0
+#define GEN_OBLOCK_WRITE 8
/* For byte scatters and gathers, the element to write */
#define GEN_BYTE_SCATTER_BYTE 0
uint32_t end_of_thread:1;
} gen7_byte_rw;
+ /*! Data port OBlock read / write */
+ struct {
+ uint32_t bti:8;
+ uint32_t block_size:3;
+ uint32_t ignored:2;
+ uint32_t invalidate_after_read:1;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_oblock_rw;
+
struct {
uint32_t src1_subreg_nr_high:1;
uint32_t src1_reg_nr:8;
*/
/**
- * \file gen_eu.hpp
+ * \file gen_encoder.hpp
* \author Benjamin Segovia <benjamin.segovia@intel.com>
* This is a revamped Gen ISA encoder from Mesa code base
*/
return false;
}
+ static void setMessageDescriptor(GenEncoder *p,
+ GenInstruction *inst,
+ enum GenMessageTarget sfid,
+ unsigned msg_length,
+ unsigned response_length,
+ bool header_present = false,
+ bool end_of_thread = false)
+ {
+ p->setSrc1(inst, GenReg::immd(0));
+ inst->bits3.generic_gen5.header_present = header_present;
+ inst->bits3.generic_gen5.response_length = response_length;
+ inst->bits3.generic_gen5.msg_length = msg_length;
+ inst->bits3.generic_gen5.end_of_thread = end_of_thread;
+ inst->header.destreg_or_condmod = sfid;
+ }
+
+ static void setDPUntypedRW(GenEncoder *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t rgba,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ uint32_t response_length)
+ {
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE;
+ setMessageDescriptor(p, insn, sfid, msg_length, response_length);
+ insn->bits3.gen7_untyped_rw.msg_type = msg_type;
+ insn->bits3.gen7_untyped_rw.bti = bti;
+ insn->bits3.gen7_untyped_rw.rgba = rgba;
+ if (p->curr.execWidth == 8)
+ insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD8;
+ else if (p->curr.execWidth == 16)
+ insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD16;
+ else
+ NOT_SUPPORTED;
+ }
+
+ static void setDPByteScatterGather(GenEncoder *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t elem_size,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ uint32_t response_length)
+ {
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE;
+ setMessageDescriptor(p, insn, sfid, msg_length, response_length);
+ insn->bits3.gen7_byte_rw.msg_type = msg_type;
+ insn->bits3.gen7_byte_rw.bti = bti;
+ insn->bits3.gen7_byte_rw.data_size = elem_size;
+ if (p->curr.execWidth == 8)
+ insn->bits3.gen7_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD8;
+ else if (p->curr.execWidth == 16)
+ insn->bits3.gen7_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD16;
+ else
+ NOT_SUPPORTED;
+ }
+
+ static void setOBlockRW(GenEncoder *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t size,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ uint32_t response_length)
+ {
+ const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE;
+ setMessageDescriptor(p, insn, sfid, msg_length, response_length);
+ assert(size == 2 || size == 4);
+ insn->bits3.gen7_oblock_rw.msg_type = msg_type;
+ insn->bits3.gen7_oblock_rw.bti = bti;
+ insn->bits3.gen7_oblock_rw.block_size = size == 2 ? 2 : 3;
+ insn->bits3.gen7_oblock_rw.header_present = 1;
+ }
+
+ static void setSamplerMessage(GenEncoder *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t sampler,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header,
+ uint32_t simd_mode,
+ uint32_t return_format)
+ {
+ const GenMessageTarget sfid = GEN_SFID_SAMPLER;
+ setMessageDescriptor(p, insn, sfid, msg_length, response_length, header);
+ insn->bits3.sampler_gen7.bti = bti;
+ insn->bits3.sampler_gen7.sampler = sampler;
+ insn->bits3.sampler_gen7.msg_type = msg_type;
+ insn->bits3.sampler_gen7.simd_mode = simd_mode;
+ }
+
//////////////////////////////////////////////////////////////////////////
// Gen Emitter encoding class
//////////////////////////////////////////////////////////////////////////
if (reg.file != GEN_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
- insn->bits1.da1.src0_reg_file = reg.file;
- insn->bits1.da1.src0_reg_type = reg.type;
- insn->bits2.da1.src0_abs = reg.absolute;
- insn->bits2.da1.src0_negate = reg.negation;
- insn->bits2.da1.src0_address_mode = reg.address_mode;
+ if (reg.address_mode == GEN_ADDRESS_DIRECT) {
+ insn->bits1.da1.src0_reg_file = reg.file;
+ insn->bits1.da1.src0_reg_type = reg.type;
+ insn->bits2.da1.src0_abs = reg.absolute;
+ insn->bits2.da1.src0_negate = reg.negation;
+ insn->bits2.da1.src0_address_mode = reg.address_mode;
- if (reg.file == GEN_IMMEDIATE_VALUE) {
- insn->bits3.ud = reg.dw1.ud;
+ if (reg.file == GEN_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
- /* Required to set some fields in src1 as well: */
- insn->bits1.da1.src1_reg_file = 0; /* arf */
- insn->bits1.da1.src1_reg_type = reg.type;
- }
- else {
- if (insn->header.access_mode == GEN_ALIGN_1) {
- insn->bits2.da1.src0_subreg_nr = reg.subnr;
- insn->bits2.da1.src0_reg_nr = reg.nr;
- } else {
- insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
- insn->bits2.da16.src0_reg_nr = reg.nr;
- }
-
- if (reg.width == GEN_WIDTH_1 &&
- insn->header.execution_size == GEN_WIDTH_1) {
- insn->bits2.da1.src0_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
- insn->bits2.da1.src0_width = GEN_WIDTH_1;
- insn->bits2.da1.src0_vert_stride = GEN_VERTICAL_STRIDE_0;
+ /* Required to set some fields in src1 as well: */
+ insn->bits1.da1.src1_reg_file = 0; /* arf */
+ insn->bits1.da1.src1_reg_type = reg.type;
}
else {
- insn->bits2.da1.src0_horiz_stride = reg.hstride;
- insn->bits2.da1.src0_width = reg.width;
- insn->bits2.da1.src0_vert_stride = reg.vstride;
+ if (insn->header.access_mode == GEN_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ } else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
+
+ if (reg.width == GEN_WIDTH_1 &&
+ insn->header.execution_size == GEN_WIDTH_1) {
+ insn->bits2.da1.src0_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = GEN_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = GEN_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ }
}
- }
+ } else {
+ insn->bits1.ia1.src0_reg_file = GEN_GENERAL_REGISTER_FILE;
+ insn->bits1.ia1.src0_reg_type = reg.type;
+ insn->bits2.ia1.src0_subreg_nr = 0;
+ insn->bits2.ia1.src0_indirect_offset = 0;
+ insn->bits2.ia1.src0_abs = 0;
+ insn->bits2.ia1.src0_negate = 0;
+ insn->bits2.ia1.src0_address_mode = reg.address_mode;
+ insn->bits2.ia1.src0_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
+ insn->bits2.ia1.src0_width = GEN_WIDTH_1;
+ insn->bits2.ia1.src0_vert_stride = GEN_VERTICAL_STRIDE_ONE_DIMENSIONAL;
+ }
}
-
void GenEncoder::setSrc1(GenInstruction *insn, GenReg reg) {
assert(reg.nr < 128);
}
}
- static void
- brw_set_message_descriptor(GenEncoder *p,
- GenInstruction *inst,
- enum GenMessageTarget sfid,
- unsigned msg_length,
- unsigned response_length,
- bool header_present = false,
- bool end_of_thread = false)
- {
- p->setSrc1(inst, GenReg::immd(0));
- inst->bits3.generic_gen5.header_present = header_present;
- inst->bits3.generic_gen5.response_length = response_length;
- inst->bits3.generic_gen5.msg_length = msg_length;
- inst->bits3.generic_gen5.end_of_thread = end_of_thread;
- inst->header.destreg_or_condmod = sfid;
- }
-
- static void
- set_dp_untyped_rw(GenEncoder *p,
- GenInstruction *insn,
- uint32_t bti,
- uint32_t rgba,
- uint32_t msg_type,
- uint32_t msg_length,
- uint32_t response_length)
- {
- GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE;
- brw_set_message_descriptor(p, insn, sfid, msg_length, response_length);
- insn->bits3.gen7_untyped_rw.msg_type = msg_type;
- insn->bits3.gen7_untyped_rw.bti = bti;
- insn->bits3.gen7_untyped_rw.rgba = rgba;
- if (p->curr.execWidth == 8)
- insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD8;
- else if (p->curr.execWidth == 16)
- insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD16;
- else
- NOT_SUPPORTED;
- }
-
- static void
- set_dp_byte_scatter_gather(GenEncoder *p,
- GenInstruction *insn,
- uint32_t bti,
- uint32_t elem_size,
- uint32_t msg_type,
- uint32_t msg_length,
- uint32_t response_length)
- {
- GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE;
- brw_set_message_descriptor(p, insn, sfid, msg_length, response_length);
- insn->bits3.gen7_byte_rw.msg_type = msg_type;
- insn->bits3.gen7_byte_rw.bti = bti;
- insn->bits3.gen7_byte_rw.data_size = elem_size;
- if (p->curr.execWidth == 8)
- insn->bits3.gen7_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD8;
- else if (p->curr.execWidth == 16)
- insn->bits3.gen7_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD16;
- else
- NOT_SUPPORTED;
- }
-
static const uint32_t untypedRWMask[] = {
GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED,
GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN,
0
};
- void
- GenEncoder::UNTYPED_READ(GenReg dst, GenReg src, uint32_t bti, uint32_t elemNum) {
+ void GenEncoder::UNTYPED_READ(GenReg dst, GenReg src, uint32_t bti, uint32_t elemNum) {
GenInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
uint32_t msg_length = 0;
this->setDst(insn, GenReg::uw16grf(dst.nr, 0));
this->setSrc0(insn, GenReg::ud8grf(src.nr, 0));
this->setSrc1(insn, GenReg::immud(0));
- set_dp_untyped_rw(this,
- insn,
- bti,
- untypedRWMask[elemNum],
- GEN_UNTYPED_READ,
- msg_length,
- response_length);
+ setDPUntypedRW(this,
+ insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN_UNTYPED_READ,
+ msg_length,
+ response_length);
}
- void
- GenEncoder::UNTYPED_WRITE(GenReg msg, uint32_t bti, uint32_t elemNum) {
+ void GenEncoder::UNTYPED_WRITE(GenReg msg, uint32_t bti, uint32_t elemNum) {
GenInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
uint32_t msg_length = 0;
NOT_IMPLEMENTED;
this->setSrc0(insn, GenReg::ud8grf(msg.nr, 0));
this->setSrc1(insn, GenReg::immud(0));
- set_dp_untyped_rw(this,
- insn,
- bti,
- untypedRWMask[elemNum],
- GEN_UNTYPED_WRITE,
- msg_length,
- response_length);
+ setDPUntypedRW(this,
+ insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN_UNTYPED_WRITE,
+ msg_length,
+ response_length);
}
- void
- GenEncoder::BYTE_GATHER(GenReg dst, GenReg src, uint32_t bti, uint32_t elemSize) {
+ void GenEncoder::BYTE_GATHER(GenReg dst, GenReg src, uint32_t bti, uint32_t elemSize) {
GenInstruction *insn = this->next(GEN_OPCODE_SEND);
uint32_t msg_length = 0;
uint32_t response_length = 0;
this->setDst(insn, GenReg::uw16grf(dst.nr, 0));
this->setSrc0(insn, GenReg::ud8grf(src.nr, 0));
this->setSrc1(insn, GenReg::immud(0));
- set_dp_byte_scatter_gather(this,
- insn,
- bti,
- elemSize,
- GEN_BYTE_GATHER,
- msg_length,
- response_length);
+ setDPByteScatterGather(this,
+ insn,
+ bti,
+ elemSize,
+ GEN_BYTE_GATHER,
+ msg_length,
+ response_length);
}
- void
- GenEncoder::BYTE_SCATTER(GenReg msg, uint32_t bti, uint32_t elemSize) {
+ void GenEncoder::BYTE_SCATTER(GenReg msg, uint32_t bti, uint32_t elemSize) {
GenInstruction *insn = this->next(GEN_OPCODE_SEND);
uint32_t msg_length = 0;
uint32_t response_length = 0;
NOT_IMPLEMENTED;
this->setSrc0(insn, GenReg::ud8grf(msg.nr, 0));
this->setSrc1(insn, GenReg::immud(0));
- set_dp_byte_scatter_gather(this,
- insn,
- bti,
- elemSize,
- GEN_BYTE_SCATTER,
- msg_length,
- response_length);
- }
-
- static void
- set_sampler_message(GenEncoder *p,
- GenInstruction *insn,
- uint32_t bti,
- uint32_t sampler,
- uint32_t msg_type,
- uint32_t response_length,
- uint32_t msg_length,
- uint32_t header_present,
- uint32_t simd_mode,
- uint32_t return_format)
- {
- brw_set_message_descriptor(p, insn, GEN_SFID_SAMPLER, msg_length,
- response_length, header_present);
- insn->bits3.sampler_gen7.bti = bti;
- insn->bits3.sampler_gen7.sampler = sampler;
- insn->bits3.sampler_gen7.msg_type = msg_type;
- insn->bits3.sampler_gen7.simd_mode = simd_mode;
+ setDPByteScatterGather(this,
+ insn,
+ bti,
+ elemSize,
+ GEN_BYTE_SCATTER,
+ msg_length,
+ response_length);
}
GenInstruction *GenEncoder::next(uint32_t opcode) {
#define ALU3(OP) \
void GenEncoder::OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2) { \
- alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
+ alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
}
ALU1(MOV)
}
void GenEncoder::MUL(GenReg dest, GenReg src0, GenReg src1) {
- /* 6.32.38: mul */
if (src0.type == GEN_TYPE_D ||
src0.type == GEN_TYPE_UD ||
src1.type == GEN_TYPE_D ||
- src1.type == GEN_TYPE_UD) {
+ src1.type == GEN_TYPE_UD)
assert(dest.type != GEN_TYPE_F);
- }
if (src0.type == GEN_TYPE_F ||
(src0.file == GEN_IMMEDIATE_VALUE &&
this->setHeader(insn);
this->setDst(insn, dest);
this->setSrc0(insn, src0);
- set_sampler_message(this,
- insn,
- bti,
- sampler,
- msg_type,
- response_length,
- msg_length,
- header_present,
- simd_mode,
- return_format);
+ setSamplerMessage(this,
+ insn,
+ bti,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ header_present,
+ simd_mode,
+ return_format);
+ }
+
+ void GenEncoder::OBREAD(GenReg dst, GenReg header, uint32_t bti, uint32_t size) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ const uint32_t msg_length = 1;
+ const uint32_t response_length = size / 2; // Size is in owords
+ this->setHeader(insn);
+ this->setDst(insn, GenReg::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenReg::ud8grf(header.nr, 0));
+ this->setSrc1(insn, GenReg::immud(0));
+ insn->header.execution_size = response_length == 1 ? GEN_WIDTH_8 : GEN_WIDTH_16;
+ setOBlockRW(this,
+ insn,
+ bti,
+ size,
+ GEN_OBLOCK_READ,
+ msg_length,
+ response_length);
+ }
+
+ void GenEncoder::OBWRITE(GenReg header, uint32_t bti, uint32_t size) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ const uint32_t msg_length = 1 + size / 2; // Size is in owords
+ const uint32_t response_length = 0;
+ this->setHeader(insn);
+ this->setSrc0(insn, GenReg::ud8grf(header.nr, 0));
+ this->setSrc1(insn, GenReg::immud(0));
+ this->setDst(insn, GenReg::retype(GenReg::null(), GEN_TYPE_UW));
+ insn->header.execution_size = msg_length == 2 ? GEN_WIDTH_8 : GEN_WIDTH_16;
+ setOBlockRW(this,
+ insn,
+ bti,
+ size,
+ GEN_OBLOCK_WRITE,
+ msg_length,
+ response_length);
}
void GenEncoder::EOT(uint32_t msg) {
this->address_mode = GEN_ADDRESS_DIRECT;
}
+ /*! Build an indirectly addressed source */
+ static INLINE GenReg indirect(uint32_t type, uint32_t subnr, uint32_t width) {
+ GenReg reg;
+ reg.type = type;
+ reg.file = GEN_GENERAL_REGISTER_FILE;
+ reg.address_mode = GEN_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.width = width;
+ reg.subnr = subnr;
+ reg.nr = 0;
+ reg.negation = 0;
+ reg.absolute = 0;
+ reg.vstride = 0;
+ reg.hstride = 0;
+ return reg;
+ }
+
static INLINE GenReg Qn(GenReg reg, uint32_t quarter) {
if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
return reg;
else {
const uint32_t typeSz = typeSize(reg.type);
const uint32_t horizontal = stride(reg.hstride);
- const uint32_t grfOffset = reg.nr*GEN_REG_SIZE + typeSz*reg.subnr;
- const uint32_t nextOffset = grfOffset + 8*quarter*typeSz*horizontal;
+ const uint32_t grfOffset = reg.nr*GEN_REG_SIZE + reg.subnr;
+ const uint32_t nextOffset = grfOffset + 8*quarter*horizontal*typeSz;
reg.nr = nextOffset / GEN_REG_SIZE;
- reg.subnr = (nextOffset % GEN_REG_SIZE) / typeSz;
+ reg.subnr = (nextOffset % GEN_REG_SIZE);
return reg;
}
}
return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
}
+ static INLINE GenReg addr1(uint32_t subnr) {
+ return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ADDRESS, subnr);
+ }
+
+ static INLINE GenReg addr8(uint32_t subnr) {
+ return uw8(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ADDRESS, subnr);
+ }
+
static INLINE GenReg next(GenReg reg) {
reg.nr++;
return reg;
void BYTE_GATHER(GenReg dst, GenReg src, uint32_t bti, uint32_t elemSize);
/*! Byte scatter (for unaligned bytes, shorts and ints) */
void BYTE_SCATTER(GenReg src, uint32_t bti, uint32_t elemSize);
+ /*! OBlock read */
+ void OBREAD(GenReg dst, GenReg header, uint32_t bti, uint32_t elemSize);
+ /*! OBlock read */
+ void OBWRITE(GenReg header, uint32_t bti, uint32_t elemSize);
/*! Send instruction for the sampler */
void SAMPLE(GenReg dest,
uint32_t msg_reg_nr,
}
#define SEL_REG(SIMD16, SIMD8, SIMD1) \
- if (ctx.isScalarOrBool(reg) == true) \
+ if (ctx.sel->isScalarOrBool(reg) == true) \
return SelectionReg::retype(SelectionReg::SIMD1(reg), genType); \
else if (simdWidth == 8) \
return SelectionReg::retype(SelectionReg::SIMD8(reg), genType); \
SelectionInstruction *insn = this->appendInsn();
insn->src[0] = src0;
insn->src[1] = src1;
- insn->function = conditional;
+ insn->extra.function = conditional;
insn->opcode = SEL_OP_CMP;
insn->state = this->curr;
insn->srcNum = 2;
for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
insn->dst[elemID] = dst[elemID];
insn->src[0] = addr;
- insn->function = bti;
- insn->elem = elemNum;
+ insn->extra.function = bti;
+ insn->extra.elem = elemNum;
insn->state = this->curr;
insn->srcNum = 1;
insn->dstNum = elemNum;
srcVector->reg = insn->src;
}
- void Selection::UNTYPED_WRITE(Reg addr,
+ void Selection::UNTYPED_WRITE(Reg addr,
const SelectionReg *src,
uint32_t elemNum,
uint32_t bti)
- {
+ {
SelectionInstruction *insn = this->appendInsn();
SelectionVector *vector = this->appendVector();
insn->src[0] = addr;
for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
insn->src[elemID+1] = src[elemID];
- insn->function = bti;
- insn->elem = elemNum;
+ insn->extra.function = bti;
+ insn->extra.elem = elemNum;
insn->state = this->curr;
insn->srcNum = elemNum+1;
insn->dstNum = 0;
insn->opcode = SEL_OP_BYTE_GATHER;
insn->src[0] = addr;
insn->dst[0] = dst;
- insn->function = bti;
- insn->elem = elemSize;
+ insn->extra.function = bti;
+ insn->extra.elem = elemSize;
insn->state = this->curr;
insn->srcNum = 1;
insn->dstNum = 1;
insn->opcode = SEL_OP_BYTE_SCATTER;
insn->src[0] = addr;
insn->src[1] = src;
- insn->function = bti;
- insn->elem = elemSize;
+ insn->extra.function = bti;
+ insn->extra.elem = elemSize;
insn->state = this->curr;
insn->srcNum = 2;
insn->dstNum = 0;
insn->dst[0] = dst;
insn->src[0] = src0;
insn->src[1] = src1;
- insn->function = function;
+ insn->extra.function = function;
insn->state = this->curr;
insn->srcNum = 2;
insn->dstNum = 1;
insn->dstNum = 1;
}
+ void Selection::REGION(Reg dst0, Reg dst1, const SelectionReg *src,
+ uint32_t offset, uint32_t vstride,
+ uint32_t width, uint32_t hstride,
+ uint32_t srcNum)
+ {
+ SelectionInstruction *insn = this->appendInsn();
+ SelectionVector *vector = this->appendVector();
+
+ // Instruction to encode
+ insn->opcode = SEL_OP_REGION;
+ insn->dst[0] = dst0;
+ insn->dst[1] = dst1;
+ GBE_ASSERT(srcNum <= SelectionInstruction::MAX_SRC_NUM);
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID)
+ insn->src[srcID] = src[srcID];
+ insn->state = this->curr;
+ insn->srcNum = srcNum;
+ insn->dstNum = 2;
+ insn->extra.vstride = vstride;
+ insn->extra.width = width;
+ insn->extra.offset = offset;
+ insn->extra.hstride = hstride;
+
+ // Regioning requires contiguous allocation for the sources
+ vector->regNum = srcNum;
+ vector->reg = insn->src;
+ vector->isSrc = 1;
+ }
+
+ void Selection::RGATHER(Reg dst, const SelectionReg *src, uint32_t srcNum)
+ {
+ SelectionInstruction *insn = this->appendInsn();
+ SelectionVector *vector = this->appendVector();
+
+ // Instruction to encode
+ insn->opcode = SEL_OP_RGATHER;
+ insn->dst[0] = dst;
+ GBE_ASSERT(srcNum <= SelectionInstruction::MAX_SRC_NUM);
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID)
+ insn->src[srcID] = src[srcID];
+ insn->state = this->curr;
+ insn->srcNum = srcNum;
+ insn->dstNum = 1;
+
+ // Regioning requires contiguous allocation for the sources
+ vector->regNum = srcNum;
+ vector->reg = insn->src;
+ vector->isSrc = 1;
+ }
+
+ void Selection::OBREAD(Reg dst, Reg addr, Reg header, uint32_t bti, uint32_t size) {
+ SelectionInstruction *insn = this->appendInsn();
+ insn->opcode = SEL_OP_OBREAD;
+ insn->dst[0] = dst;
+ insn->src[0] = addr;
+ insn->src[1] = header;
+ insn->state = this->curr;
+ insn->srcNum = 2;
+ insn->dstNum = 1;
+ insn->extra.function = bti;
+ insn->extra.elem = size / sizeof(int[4]); // number of owords
+ }
+
+ void Selection::OBWRITE(Reg addr, Reg value, Reg header, uint32_t bti, uint32_t size) {
+ SelectionInstruction *insn = this->appendInsn();
+ SelectionVector *vector = this->appendVector();
+ insn->opcode = SEL_OP_OBWRITE;
+ insn->src[0] = header;
+ insn->src[1] = value;
+ insn->src[2] = addr;
+ insn->state = this->curr;
+ insn->srcNum = 3;
+ insn->dstNum = 0;
+ insn->extra.function = bti;
+ insn->extra.elem = size / sizeof(int[4]); // number of owords
+
+ // We need to put the header and the data together
+ vector->regNum = 2;
+ vector->reg = insn->src;
+ vector->isSrc = 1;
+ }
+
///////////////////////////////////////////////////////////////////////////
// SimpleSelection
///////////////////////////////////////////////////////////////////////////
/*! Backward and forward branches are handled slightly differently */
void emitForwardBranch(const ir::BranchInstruction&, ir::LabelIndex dst, ir::LabelIndex src);
void emitBackwardBranch(const ir::BranchInstruction&, ir::LabelIndex dst, ir::LabelIndex src);
+
+ // Gen OCL extensions
+ void emitRegionInstruction(const ir::RegionInstruction &insn);
+ void emitVoteInstruction(const ir::VoteInstruction &insn);
+ void emitRGatherInstruction(const ir::RGatherInstruction &insn);
+ void emitOBReadInstruction(const ir::OBReadInstruction &insn);
+ void emitOBWriteInstruction(const ir::OBWriteInstruction &insn);
};
SimpleSelection::SimpleSelection(GenContext &ctx) :
this->push();
// Boolean values use scalars
- if (ctx.isScalarOrBool(insn.getDst(0)) == true) {
+ if (ctx.sel->isScalarOrBool(insn.getDst(0)) == true) {
this->curr.execWidth = 1;
this->curr.predicate = GEN_PREDICATE_NONE;
this->curr.noMask = 1;
this->pop();
}
- void SimpleSelection::emitTernaryInstruction(const ir::TernaryInstruction &insn) {
- NOT_IMPLEMENTED;
- }
void SimpleSelection::emitSelectInstruction(const ir::SelectInstruction &insn) {
+ using namespace ir;
+
+ // Get all registers for the instruction
+ const Type type = insn.getType();
+ const SelectionReg pred = this->selReg(insn.getPredicate(), TYPE_BOOL);
+ const SelectionReg dst = this->selReg(insn.getDst(0), type);
+ const SelectionReg src0 = this->selReg(insn.getSrc(SelectInstruction::src0Index), type);
+ const SelectionReg src1 = this->selReg(insn.getSrc(SelectInstruction::src1Index), type);
+
+ // Since we cannot predicate the select instruction with our current mask,
+ // we need to perform the selection in two steps (one to select, one to
+ // update the destination register)
+ const RegisterFamily family = getFamily(type);
+ const SelectionReg tmp = this->selReg(this->reg(family), type);
+ const uint32_t simdWidth = ctx.getSimdWidth();
+
+ this->push();
+ // Move the predicate into a flag register (TODO use cmp:w with blockIP)
+ this->curr.predicate = GEN_PREDICATE_NONE;
+ this->curr.execWidth = 1;
+ this->curr.noMask = 1;
+ this->MOV(SelectionReg::flag(0,1), pred);
+
+ // Perform the selection
+ this->curr.predicate = GEN_PREDICATE_NORMAL;
+ this->curr.execWidth = simdWidth;
+ this->curr.noMask = 0;
+ this->curr.flag = 0;
+ this->curr.subFlag = 1;
+ this->SEL(tmp, src0, src1);
+ this->pop();
+
+ // Update the destination register properly now
+ this->MOV(dst, tmp);
+ }
+
+ void SimpleSelection::emitTernaryInstruction(const ir::TernaryInstruction &insn) {
NOT_IMPLEMENTED;
}
void SimpleSelection::emitSampleInstruction(const ir::SampleInstruction &insn) {
using namespace ir;
const uint32_t valueNum = insn.getValueNum();
SelectionReg dst[valueNum];
-
- for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
- dst[dstID] = SelectionReg::retype(this->selReg(insn.getValue(dstID)), GEN_TYPE_F);
+ for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
+ dst[dstID] = SelectionReg::retype(this->selReg(insn.getValue(dstID)), GEN_TYPE_F);
this->UNTYPED_READ(addr, dst, valueNum, 0);
}
}
void SimpleSelection::emitByteGather(const ir::LoadInstruction &insn,
- SelectionReg address,
- SelectionReg value)
+ SelectionReg address,
+ SelectionReg value)
{
using namespace ir;
GBE_ASSERT(insn.getValueNum() == 1);
const SelectionReg src0 = this->selReg(insn.getSrc(0), type);
const SelectionReg src1 = this->selReg(insn.getSrc(1), type);
- // Copy the predicate to save it basically
+ // Copy the predicate to save it basically (TODO use cmp:w with blockIP)
this->push();
this->curr.noMask = 1;
this->curr.execWidth = 1;
this->MOV(dst, src);
}
-
void SimpleSelection::emitBranchInstruction(const ir::BranchInstruction &insn) {
using namespace ir;
const Opcode opcode = insn.getOpcode();
- if (opcode == OP_RET) {
-#if 0
- this->push();
- this->curr.predicate = GEN_PREDICATE_NONE;
- this->curr.execWidth = 8;
- this->curr.noMask = 1;
- this->MOV(SelectionReg::f8grf(127,0), SelectionReg::f8grf(0,0));
- this->EOT(127);
- this->pop();
-#endif
+ if (opcode == OP_RET)
this->EOT();
- } else if (opcode == OP_BRA) {
+ else if (opcode == OP_BRA) {
const LabelIndex dst = insn.getLabelIndex();
const LabelIndex src = insn.getParent()->getLabelIndex();
}
}
+ void SimpleSelection::emitRegionInstruction(const ir::RegionInstruction &insn) {
+ using namespace ir;
+
+ // Two destinations: one is the real destination, one is a temporary
+ SelectionReg dst0 = this->selReg(insn.getDst(0)), dst1;
+ if (ctx.getSimdWidth() == 8)
+ dst1 = SelectionReg::ud8grf(this->reg(FAMILY_DWORD));
+ else
+ dst1 = SelectionReg::ud16grf(this->reg(FAMILY_DWORD));
+
+ // Get all the sources
+ SelectionReg src[SelectionInstruction::MAX_SRC_NUM];
+ const uint32_t srcNum = insn.getSrcNum();
+ GBE_ASSERT(srcNum <= SelectionInstruction::MAX_SRC_NUM);
+ for (uint32_t srcID = 0; srcID < insn.getSrcNum(); ++srcID)
+ src[srcID] = this->selReg(insn.getSrc(srcID));
+
+ // Get the region parameters
+ const uint32_t offset = insn.getOffset();
+ const uint32_t vstride = insn.getVStride();
+ const uint32_t width = insn.getWidth();
+ const uint32_t hstride = insn.getHStride();
+ this->REGION(dst0, dst1, src, offset, vstride, width, hstride, srcNum);
+ }
+
+ void SimpleSelection::emitVoteInstruction(const ir::VoteInstruction &insn) {
+ using namespace ir;
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ const SelectionReg dst = this->selReg(insn.getDst(0), TYPE_U16);
+ const SelectionReg src = this->selReg(insn.getSrc(0), TYPE_U16);
+
+ // Limit the vote to the active lanes
+ this->push();
+ // Move the predicate into a flag register (TODO use cmp:w with blockIP)
+ this->curr.predicate = GEN_PREDICATE_NONE;
+ this->curr.execWidth = 1;
+ this->curr.noMask = 1;
+ this->MOV(SelectionReg::flag(0,1), SelectionReg::flag(0,0));
+ this->pop();
+
+ // Emit the compare instruction to get the flag register
+ this->push();
+ const VotePredicate vote = insn.getVotePredicate();
+ const uint32_t genCmp = vote == VOTE_ANY ? GEN_CONDITIONAL_NEQ : GEN_CONDITIONAL_EQ;
+ this->curr.flag = 0;
+ this->curr.subFlag = 1;
+ this->CMP(genCmp, src, SelectionReg::immuw(0));
+ this->pop();
+
+ // Broadcast the result to the destination
+ if (vote == VOTE_ANY)
+ this->MOV(dst, SelectionReg::flag(0,1));
+ else {
+ const SelectionReg tmp = this->selReg(this->reg(FAMILY_WORD), TYPE_U16);
+ this->push();
+ // Set all lanes of tmp to zero
+ this->curr.predicate = GEN_PREDICATE_NONE;
+ this->MOV(tmp, SelectionReg::immuw(0));
+
+ // Compute the short values with no mask
+ this->curr.flag = 0;
+ this->curr.subFlag = 1;
+ this->curr.inversePredicate = 1;
+ this->curr.predicate = simdWidth == 8 ?
+ GEN_PREDICATE_ALIGN1_ANY8H :
+ GEN_PREDICATE_ALIGN1_ANY16H;
+ this->MOV(tmp, SelectionReg::immuw(1));
+ this->pop();
+
+ // Update the destination with the proper mask
+ this->MOV(dst, tmp);
+ }
+ }
+
+ void SimpleSelection::emitRGatherInstruction(const ir::RGatherInstruction &insn) {
+ using namespace ir;
+ // Two destinations: one is the real destination, one is a temporary
+ const SelectionReg dst = this->selReg(insn.getDst(0)), dst1;
+
+ // Get all the sources
+ SelectionReg src[SelectionInstruction::MAX_SRC_NUM];
+ const uint32_t srcNum = insn.getSrcNum();
+ GBE_ASSERT(srcNum <= SelectionInstruction::MAX_SRC_NUM);
+ for (uint32_t srcID = 0; srcID < insn.getSrcNum(); ++srcID)
+ src[srcID] = this->selReg(insn.getSrc(srcID));
+
+ // Get the region parameters
+ this->RGATHER(dst, src, srcNum);
+ }
+
+ void SimpleSelection::emitOBReadInstruction(const ir::OBReadInstruction &insn) {
+ using namespace ir;
+ const SelectionReg header = this->selReg(this->reg(FAMILY_DWORD), TYPE_U32);
+ const SelectionReg addr = this->selReg(insn.getAddress(), TYPE_U32);
+ const SelectionReg value = this->selReg(insn.getValue(), TYPE_U32);
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ this->OBREAD(value, addr, header, 0xff, simdWidth * sizeof(int));
+ }
+
+ void SimpleSelection::emitOBWriteInstruction(const ir::OBWriteInstruction &insn) {
+ using namespace ir;
+ const SelectionReg header = this->selReg(this->reg(FAMILY_DWORD), TYPE_U32);
+ const SelectionReg addr = this->selReg(insn.getAddress(), TYPE_U32);
+ const SelectionReg value = this->selReg(insn.getValue(), TYPE_U32);
+ const uint32_t simdWidth = ctx.getSimdWidth();
+ this->OBWRITE(addr, value, header, 0xff, simdWidth * sizeof(int));
+ }
+
Selection *newSimpleSelection(GenContext &ctx) {
return GBE_NEW(SimpleSelection, ctx);
}
} /* namespace gbe */
+
/*! Instruction are chained in the block */
SelectionInstruction *prev, *next;
/*! No more than 6 sources (used by typed writes) */
- enum { MAX_SRC_NUM = 6 };
+ enum { MAX_SRC_NUM = 8 };
/*! No more than 4 destinations (used by samples and untyped reads) */
enum { MAX_DST_NUM = 4 };
/*! All destinations */
SelectionState state;
/*! Gen opcode */
uint8_t opcode;
- /*! For math and cmp instructions. Store bti for loads/stores */
- uint8_t function:4;
- /*! elemSize for byte scatters / gathers, elemNum for untyped msg */
- uint8_t elem:4;
+ union {
+ struct {
+ /*! Store bti for loads/stores and function for math and compares */
+ uint16_t function:8;
+ /*! elemSize for byte scatters / gathers, elemNum for untyped msg */
+ uint16_t elem:8;
+ };
+ struct {
+ /*! Number of sources in the tuple */
+ uint8_t width:4;
+ /*! vertical stride (0,1,2,4,8 or 16) */
+ uint16_t vstride:5;
+ /*! horizontal stride (0,1,2,4,8 or 16) */
+ uint16_t hstride:5;
+ /*! offset (0 to 7) */
+ uint16_t offset:5;
+ };
+ } extra;
/*! Number of sources */
uint8_t srcNum:4;
/*! Number of destinations */
void BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti);
/*! Byte scatter (for unaligned bytes, shorts and ints) */
void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti);
+ /*! Oblock read */
+ void OBREAD(Reg dst, Reg addr, Reg header, uint32_t bti, uint32_t size);
+ /*! Oblock write */
+ void OBWRITE(Reg addr, Reg value, Reg header, uint32_t bti, uint32_t size);
/*! Extended math function */
void MATH(Reg dst, uint32_t function, Reg src0, Reg src1);
/*! Encode unary instructions */
void ALU1(uint32_t opcode, Reg dst, Reg src);
/*! Encode binary instructions */
void ALU2(uint32_t opcode, Reg dst, Reg src0, Reg src1);
+ /*! Encode regioning */
+ void REGION(Reg dst0, Reg dst1, const SelectionReg *src, uint32_t offset, uint32_t vstride, uint32_t width, uint32_t hstride, uint32_t srcNum);
+ /*! Encode regioning */
+ void RGATHER(Reg dst, const SelectionReg *src, uint32_t srcNum);
/*! Use custom allocators */
GBE_CLASS(Selection);
};
DECL_SELECTION_IR(RNDE, UnaryInstruction)
DECL_SELECTION_IR(RNDD, UnaryInstruction)
DECL_SELECTION_IR(FRC, UnaryInstruction)
-DECL_SELECTION_IR(SEL, SelectInstruction)
+DECL_SELECTION_IR(SEL, BinaryInstruction)
DECL_SELECTION_IR(AND, BinaryInstruction)
DECL_SELECTION_IR(OR, BinaryInstruction)
DECL_SELECTION_IR(XOR, BinaryInstruction)
DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
DECL_SELECTION_IR(BYTE_SCATTER, ByteScatterInstruction)
+DECL_SELECTION_IR(REGION, RegionInstruction)
+DECL_SELECTION_IR(RGATHER, RGatherInstruction)
+DECL_SELECTION_IR(OBREAD, OBReadInstruction)
+DECL_SELECTION_IR(OBWRITE, OBWriteInstruction)
const uint32_t offset = curbeOffset + subOffset;
const ir::RegisterData data = fn.getRegisterData(reg);
const ir::RegisterFamily family = data.family;
- const bool isScalar = ctx.isScalarOrBool(reg);
+ const bool isScalar = ctx.sel->isScalarOrBool(reg);
const uint32_t typeSize = isScalar ? familyScalarSize[family] : familyVectorSize[family];
const uint32_t nr = (offset + GEN_REG_SIZE) / GEN_REG_SIZE;
const uint32_t subnr = ((offset + GEN_REG_SIZE) % GEN_REG_SIZE) / typeSize;
while (this->expiringID != ending.size()) {
const GenRegInterval *toExpire = this->ending[this->expiringID];
const ir::Register reg = toExpire->reg;
- if (toExpire->maxID >= limit.minID)
+ if (toExpire->minID >= limit.maxID)
return false;
auto it = RA.find(reg);
GBE_ASSERT(it != RA.end());
INLINE Unit &getUnit(void) { return unit; }
/*! Get the current processed function */
Function &getFunction(void);
+ /*! Set the SIMD width of the function */
+ void setSimdWidth(uint32_t width) const {
+ GBE_ASSERT(width == 8 || width == 16);
+ fn->simdWidth = width;
+ }
/*! Append a new pushed constant */
void appendPushedConstant(Register reg, const PushLocation &pushed);
/*! Create a new register with the given family for the current function */
* \author Benjamin Segovia <benjamin.segovia@intel.com>
*/
#include "ir/function.hpp"
+#include "ir/unit.hpp"
#include "sys/string.hpp"
#include "sys/map.hpp"
return locationMap.find(*this)->second;
}
- Function::Function(const std::string &name, Profile profile) :
- name(name), profile(profile) { initProfile(*this); }
+ Function::Function(const std::string &name, const Unit &unit, Profile profile) :
+ name(name), unit(unit), profile(profile), simdWidth(0)
+ {
+ initProfile(*this);
+ }
Function::~Function(void) {
for (auto block : blocks) GBE_DELETE(block);
for (auto arg : args) GBE_DELETE(arg);
}
+ RegisterFamily Function::getPointerFamily(void) const {
+ return unit.getPointerFamily();
+ }
+
void Function::sortLabels(void) {
uint32_t last = 0;
/*! Commonly used in the CFG */
typedef set<BasicBlock*> BlockSet;
+ class Unit; // Function belongs to a unit
/*! Function basic blocks really belong to a function since:
* 1 - registers used in the basic blocks belongs to the function register
/*! Map of all pushed location (i.e. part of function argument) */
typedef map<PushLocation, Register> LocationMap;
/*! Create an empty function */
- Function(const std::string &name, Profile profile = PROFILE_OCL);
+ Function(const std::string &name, const Unit &unit, Profile profile = PROFILE_OCL);
/*! Release everything *including* the basic block pointers */
~Function(void);
/*! Says if this is the top basic block (entry point) */
}
/*! Get the function name */
const std::string &getName(void) const { return name; }
+ /*! Get the SIMD width (0 if not forced) */
+ uint32_t getSimdWidth(void) const { return simdWidth; }
/*! Extract the register from the register file */
INLINE RegisterData getRegisterData(Register reg) const { return file.get(reg); }
/*! Get the register family from the register itself */
LabelIndex newLabel(void);
/*! Create the control flow graph */
void computeCFG(void);
- /*! Sort the labels in increasing orders (ie top block has the smallest
- * labels)
- */
+ /*! Sort labels in increasing orders (top block has the smallest label) */
void sortLabels(void);
+ /*! Get the pointer family */
+ RegisterFamily getPointerFamily(void) const;
/*! Number of registers in the register file */
INLINE uint32_t regNum(void) const { return file.regNum(); }
/*! Number of register tuples in the register file */
private:
friend class Context; //!< Can freely modify a function
std::string name; //!< Function name
+ const Unit &unit; //!< Function belongs to this unit
vector<FunctionArgument*> args; //!< Input registers of the function
vector<Register> outputs; //!< Output registers of the function
vector<BasicBlock*> labels; //!< Each label points to a basic block
vector<BasicBlock*> blocks; //!< All chained basic blocks
RegisterFile file; //!< RegisterDatas used by the instructions
Profile profile; //!< Current function profile
- PushMap pushMap; //<! Pushed function arguments (reg->loc)
- LocationMap locationMap; //<! Pushed function arguments (loc->reg)
+ PushMap pushMap; //!< Pushed function arguments (reg->loc)
+ LocationMap locationMap; //!< Pushed function arguments (loc->reg)
+ uint32_t simdWidth; //!< 8 or 16 if forced, 0 otherwise
GBE_CLASS(Function); //!< Use gbe allocators
};
public NaryInstruction<1>
{
public:
- UnaryInstruction(Opcode opcode,
- Type type,
- Register dst,
- Register src) {
+ UnaryInstruction(Opcode opcode, Type type, Register dst, Register src) {
this->opcode = opcode;
this->type = type;
this->dst = dst;
public BasePolicy
{
public:
- TernaryInstruction(Opcode opcode,
- Type type,
- Register dst,
- Tuple src)
+ TernaryInstruction(Opcode opcode, Type type, Register dst, Tuple src)
{
this->opcode = opcode;
this->type = type;
public BasePolicy
{
public:
- SelectInstruction(Type type,
- Register dst,
- Tuple src)
+ SelectInstruction(Type type, Register dst, Tuple src)
{
this->opcode = OP_SEL;
this->type = type;
INLINE void out(std::ostream &out, const Function &fn) const;
Register predicate; //!< Predication means conditional branch
LabelIndex labelIndex; //!< Index of the label the branch targets
- bool hasPredicate:1; //!< Is it predicated?
- bool hasLabel:1; //!< Is there any target label?
+ bool hasPredicate:1; //!< Is it predicated?
+ bool hasLabel:1; //!< Is there any target label?
};
class ALIGNED_INSTRUCTION LoadInstruction :
LabelIndex labelIndex; //!< Index of the label
};
+ class ALIGNED_INSTRUCTION RegionInstruction : public BasePolicy
+ {
+ public:
+ RegionInstruction(Register dst,
+ Tuple src,
+ uint32_t srcNum,
+ uint32_t offset,
+ uint32_t vstride,
+ uint32_t width,
+ uint32_t hstride)
+ {
+ this->opcode = OP_REGION;
+ this->dst = dst;
+ this->src = src;
+ this->srcNum = srcNum;
+ this->offset = offset;
+ this->vstride = vstride;
+ this->width = width;
+ this->hstride = hstride;
+ }
+ INLINE uint32_t getOffset(void) const { return this->offset; }
+ INLINE uint32_t getVStride(void) const { return this->vstride; }
+ INLINE uint32_t getWidth(void) const { return this->width; }
+ INLINE uint32_t getHStride(void) const { return this->hstride; }
+ INLINE uint32_t getSrcNum(void) const { return this->srcNum; }
+ INLINE uint32_t getDstNum(void) const { return 1; }
+ INLINE Register getDst(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID == 0, "Only one destination for the instruction");
+ return dst;
+ }
+ INLINE Register getSrc(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID < this->srcNum, "Out-of-bound source register");
+ return fn.getRegister(src, ID);
+ }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ uint8_t srcNum:4; //!< Number of sources in the tuple
+ uint8_t width:4; //!< width (1,2,4 or 8)
+ Register dst; //!< Dst is the register index
+ Tuple src; //!< Contiguous registers we gather data from
+ uint16_t vstride:5; //!< vertical stride (0,1,2,4,8 or 16)
+ uint16_t hstride:5; //!< horizontal stride (0,1,2,4,8 or 16)
+ uint16_t offset:5; //!< offset (0 to 7)
+ };
+
+ class ALIGNED_INSTRUCTION VoteInstruction : public BasePolicy
+ {
+ public:
+ VoteInstruction(Register dst, Register src, VotePredicate pred) {
+ this->opcode = OP_VOTE;
+ this->dst = dst;
+ this->src = src;
+ this->pred = pred;
+ }
+ INLINE uint32_t getSrcNum(void) const { return 1; }
+ INLINE uint32_t getDstNum(void) const { return 1; }
+ INLINE Register getDst(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID == 0, "Out-of-bound destination register");
+ return dst;
+ }
+ INLINE Register getSrc(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID == 0, "Out-of-bound source register");
+ return src;
+ }
+ INLINE VotePredicate getVotePredicate(void) const { return this->pred; }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Register dst; //!< Destination boolean
+ Register src; //!< Source boolean (n lanes internally)
+ VotePredicate pred; //!< Operation to apply on the lanes
+ };
+
+ class ALIGNED_INSTRUCTION RGatherInstruction : public BasePolicy
+ {
+ public:
+ RGatherInstruction(Register dst, Tuple src, uint32_t srcNum)
+ {
+ this->opcode = OP_RGATHER;
+ this->dst = dst;
+ this->src = src;
+ this->srcNum = srcNum;
+ }
+ INLINE uint32_t getSrcNum(void) const { return this->srcNum; }
+ INLINE uint32_t getDstNum(void) const { return 1; }
+ INLINE Register getDst(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID == 0, "Only one destination for the instruction");
+ return dst;
+ }
+ INLINE Register getSrc(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID < this->srcNum, "Out-of-bound source register");
+ return fn.getRegister(src, ID);
+ }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ uint8_t srcNum:4; //!< Number of sources in the tuple
+ Register dst; //!< Dst is the register index
+ Tuple src; //!< Contiguous registers we gather data from
+ };
+
+ class ALIGNED_INSTRUCTION OBReadInstruction : public BasePolicy
+ {
+ public:
+ OBReadInstruction(Register value, Register address) {
+ this->opcode = OP_OBREAD;
+ this->value = value;
+ this->address = address;
+ }
+ INLINE uint32_t getSrcNum(void) const { return 1; }
+ INLINE uint32_t getDstNum(void) const { return 1; }
+ INLINE Register getDst(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID == 0, "Only one destination for obread");
+ return value;
+ }
+ INLINE Register getSrc(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID == 0, "Only one source for obread");
+ return address;
+ }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Register value; //!< Value to get from memory
+ Register address; //!< Address to read
+ };
+
+ class ALIGNED_INSTRUCTION OBWriteInstruction :
+ public BasePolicy, public NoDstPolicy
+ {
+ public:
+ OBWriteInstruction(Register address, Register value) {
+ this->opcode = OP_OBWRITE;
+ this->address = address;
+ this->value = value;
+ }
+ INLINE uint32_t getSrcNum(void) const { return 1; }
+ INLINE Register getSrc(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID < 2, "Only two source registers for obwrite");
+ return ID == 0 ? address : value;
+ }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Register address; //!< Address to write to
+ Register value; //!< Value to write
+ };
+
#undef ALIGNED_INSTRUCTION
/////////////////////////////////////////////////////////////////////////
}
// The label must exist and the register must of boolean family
- INLINE bool BranchInstruction::wellFormed(const Function &fn, std::string &whyNot) const
- {
+ INLINE bool BranchInstruction::wellFormed(const Function &fn, std::string &whyNot) const {
if (hasLabel)
if (UNLIKELY(labelIndex >= fn.labelNum())) {
whyNot = "Out-of-bound label index";
return false;
return true;
}
+
+ // Stride is 1,2,4,8 offset goes from 0 to 15 and registers must be dwords
+ INLINE bool RegionInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ if (UNLIKELY(vstride != 0 && vstride != 1 && vstride != 2 && vstride != 4 && vstride != 8 && vstride != 16)) {
+ whyNot = "Invalid vertical stride (must be 0, 1, 2, 4 or 8)";
+ return false;
+ }
+ if (UNLIKELY(hstride != 0 && hstride != 1 && hstride != 2 && hstride != 4 && hstride != 8 && hstride != 16)) {
+ whyNot = "Invalid horizontal stride (must be 0, 1, 2, 4 or 8)";
+ return false;
+ }
+ if (UNLIKELY(width != 0 && width != 1 && width != 2 && width != 4 && width != 8)) {
+ whyNot = "Invalid width (must be 1, 2, 4 or 8)";
+ return false;
+ }
+ if (UNLIKELY(offset > 7)) {
+ whyNot = "Invalid offset (must be smaller than 8)";
+ return false;
+ }
+ if (UNLIKELY(checkRegisterData(FAMILY_DWORD, dst, fn, whyNot) == false))
+ return false;
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const Register regID = fn.getRegister(src, srcID);
+ if (UNLIKELY(checkRegisterData(FAMILY_DWORD, regID, fn, whyNot) == false))
+ return false;
+ }
+ return true;
+ }
+
+ // Boolean values for both source and destination
+ INLINE bool VoteInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ if (UNLIKELY(checkRegisterData(FAMILY_WORD, dst, fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(FAMILY_WORD, src, fn, whyNot) == false))
+ return false;
+ return true;
+ }
+
+ // Indices are always int16 and the rest is 32 bit integers
+ INLINE bool RGatherInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ if (UNLIKELY(checkRegisterData(FAMILY_DWORD, dst, fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(FAMILY_WORD, fn.getRegister(src, 0), fn, whyNot) == false))
+ return false;
+ for (uint32_t srcID = 1; srcID < srcNum; ++srcID) {
+ const Register regID = fn.getRegister(src, srcID);
+ if (UNLIKELY(checkRegisterData(FAMILY_DWORD, regID, fn, whyNot) == false))
+ return false;
+ }
+ return true;
+ }
+
+ // Source is an address. Destination is a 32 bit integer
+ INLINE bool OBReadInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const RegisterFamily ptrFamily = fn.getPointerFamily();
+ if (UNLIKELY(checkRegisterData(ptrFamily, address, fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(FAMILY_DWORD, value, fn, whyNot) == false))
+ return false;
+ return true;
+ }
+
+ // First source is the address. Second source is the value to write
+ INLINE bool OBWriteInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const RegisterFamily ptrFamily = fn.getPointerFamily();
+ if (UNLIKELY(checkRegisterData(ptrFamily, address, fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(checkRegisterData(FAMILY_DWORD, value, fn, whyNot) == false))
+ return false;
+ return true;
+ }
+
#undef CHECK_TYPE
/////////////////////////////////////////////////////////////////////////
out << " %" << this->getDst(fn,0) << " ";
fn.outImmediate(out, immediateIndex);
}
+
+ INLINE void RegionInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << "<" << uint32_t(vstride) << ";"
+ << uint32_t(width) << "," << uint32_t(hstride)
+ << ">." << uint32_t(offset) << " ";
+ out << "%" << this->getDst(fn, 0) << " ";
+ for (uint32_t i = 0; i < this->getSrcNum(); ++i)
+ out << "%" << this->getSrc(fn, i) << (i != (srcNum-1u) ? " " : "");
+ }
+
+ INLINE void VoteInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << "." << (this->getVotePredicate() == VOTE_ALL ? "all" : "any")
+ << " %" << this->getDst(fn, 0)
+ << " %" << this->getSrc(fn, 0);
+ }
+
+ INLINE void RGatherInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << " %" << this->getDst(fn, 0);
+ for (uint32_t i = 0; i < this->getSrcNum(); ++i)
+ out << " %" << this->getSrc(fn, i);
+ }
+
+ INLINE void OBReadInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << " %" << this->getDst(fn, 0);
+ out << " %" << this->getSrc(fn, 0);
+ }
+
+ INLINE void OBWriteInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ out << " %" << this->getSrc(fn, 0);
+ out << " %" << this->getSrc(fn, 1);
+ }
+
} /* namespace internal */
std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace) {
#include "ir/instruction.hxx"
END_INTROSPECTION(LabelInstruction)
+START_INTROSPECTION(RegionInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(RegionInstruction)
+
+START_INTROSPECTION(VoteInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(VoteInstruction)
+
+START_INTROSPECTION(RGatherInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(RGatherInstruction)
+
+START_INTROSPECTION(OBReadInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(OBReadInstruction)
+
+START_INTROSPECTION(OBWriteInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(OBWriteInstruction)
+
#undef END_INTROSPECTION
#undef START_INTROSPECTION
#undef DECL_INSN
DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
+DECL_MEM_FN(RegionInstruction, uint32_t, getOffset(void), getOffset())
+DECL_MEM_FN(RegionInstruction, uint32_t, getVStride(void), getVStride())
+DECL_MEM_FN(RegionInstruction, uint32_t, getWidth(void), getWidth())
+DECL_MEM_FN(RegionInstruction, uint32_t, getHStride(void), getHStride())
+DECL_MEM_FN(VoteInstruction, VotePredicate, getVotePredicate(void), getVotePredicate())
#undef DECL_MEM_FN
// All unary functions
#define DECL_EMIT_FUNCTION(NAME) \
Instruction NAME(Type type, Register dst, Register src) { \
- const internal::UnaryInstruction insn(OP_##NAME, type, dst, src); \
- return insn.convert(); \
+ return internal::UnaryInstruction(OP_##NAME, type, dst, src).convert(); \
}
DECL_EMIT_FUNCTION(MOV)
// All binary functions
#define DECL_EMIT_FUNCTION(NAME) \
Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
- const internal::BinaryInstruction insn(OP_##NAME, type, dst, src0, src1); \
- return insn.convert(); \
+ return internal::BinaryInstruction(OP_##NAME, type, dst, src0, src1).convert(); \
}
DECL_EMIT_FUNCTION(MUL)
// MAD
Instruction MAD(Type type, Register dst, Tuple src) {
- internal::TernaryInstruction insn(OP_MAD, type, dst, src);
- return insn.convert();
+ return internal::TernaryInstruction(OP_MAD, type, dst, src).convert();
}
// SEL
Instruction SEL(Type type, Register dst, Tuple src) {
- internal::SelectInstruction insn(type, dst, src);
- return insn.convert();
+ return internal::SelectInstruction(type, dst, src).convert();
}
// All compare functions
// CVT
Instruction CVT(Type dstType, Type srcType, Register dst, Register src) {
- const internal::ConvertInstruction insn(dstType, srcType, dst, src);
- return insn.convert();
+ return internal::ConvertInstruction(dstType, srcType, dst, src).convert();
}
// BRA
Instruction BRA(LabelIndex labelIndex) {
- const internal::BranchInstruction insn(OP_BRA, labelIndex);
- return insn.convert();
+ return internal::BranchInstruction(OP_BRA, labelIndex).convert();
}
Instruction BRA(LabelIndex labelIndex, Register pred) {
- const internal::BranchInstruction insn(OP_BRA, labelIndex, pred);
- return insn.convert();
+ return internal::BranchInstruction(OP_BRA, labelIndex, pred).convert();
}
// RET
Instruction RET(void) {
- const internal::BranchInstruction insn(OP_RET);
- return insn.convert();
+ return internal::BranchInstruction(OP_RET).convert();
}
// LOADI
Instruction LOADI(Type type, Register dst, ImmediateIndex value) {
- const internal::LoadImmInstruction insn(type, dst, value);
- return insn.convert();
+ return internal::LoadImmInstruction(type, dst, value).convert();
}
// LOAD and STORE
uint32_t valueNum, \
bool dwAligned) \
{ \
- const internal::CLASS insn(type,tuple,offset,space,valueNum,dwAligned); \
- return insn.convert(); \
+ return internal::CLASS(type,tuple,offset,space,valueNum,dwAligned).convert(); \
}
DECL_EMIT_FUNCTION(LOAD, LoadInstruction)
// FENCE
Instruction FENCE(AddressSpace space) {
- const internal::FenceInstruction insn(space);
- return insn.convert();
+ return internal::FenceInstruction(space).convert();
}
// LABEL
Instruction LABEL(LabelIndex labelIndex) {
- const internal::LabelInstruction insn(labelIndex);
- return insn.convert();
+ return internal::LabelInstruction(labelIndex).convert();
+ }
+
+ // REGION
+ Instruction REGION(uint32_t offset, uint32_t vstride, uint32_t width, uint32_t hstride, Register dst, Tuple src, uint32_t srcNum) {
+ return internal::RegionInstruction(dst, src, srcNum, offset, vstride, width, hstride).convert();
+ }
+
+ // VOTE
+ Instruction VOTE(VotePredicate pred, Register dst, Register src) {
+ return internal::VoteInstruction(dst, src, pred).convert();
+ }
+
+ // RGATHER
+ Instruction RGATHER(Register dst, Tuple src, uint32_t srcNum) {
+ return internal::RGatherInstruction(dst, src, srcNum).convert();
+ }
+
+ // OBREAD
+ Instruction OBREAD(Register dst, Register address) {
+ return internal::OBReadInstruction(dst, address).convert();
+ }
+
+ // OBWRITE
+ Instruction OBWRITE(Register address, Register value) {
+ return internal::OBWriteInstruction(address, value).convert();
}
std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
MEM_PRIVATE //!< Per thread private memory
};
+ /* Vote function per hardware thread */
+ enum VotePredicate : uint8_t {
+ VOTE_ALL = 0,
+ VOTE_ANY
+ };
+
/*! Output the memory space */
std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace);
*/
class SelectInstruction : public Instruction {
public:
+ /*! Predicate is in slot 0. So first source to selec is in slot 1 */
+ static const uint32_t src0Index = 1;
+ /*! Second source to select is in slot 2 */
+ static const uint32_t src1Index = 2;
+ /*! Get the predicate of the selection instruction */
+ INLINE Register getPredicate(void) const { return this->getSrc(0); }
/*! Get the type of both sources */
Type getType(void) const;
/*! Return true if the given instruction is an instance of this class */
static bool isClassOf(const Instruction &insn);
};
+ /*! Register region instructions are specific to OpenCL Gen and allow to
+ * manipulate the register file and to do cross lane shuffles (Gen extension)
+ */
+ class RegionInstruction : public Instruction {
+ public:
+ /*! Return the offset index (0..7) for the strided load*/
+ uint32_t getOffset(void) const;
+ /*! Return the vertical stride (0,1,2,4,8) */
+ uint32_t getVStride(void) const;
+ /*! Return the width (0,1,2,4,8) */
+ uint32_t getWidth(void) const;
+ /*! Return the horizontal stride (0,1,2,4,8) */
+ uint32_t getHStride(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Vote instruction that operates accross lanes from the same hardware
+ * thread (Gen extension)
+ */
+ class VoteInstruction : public Instruction {
+ public:
+ /*! Return the vote predicate */
+ VotePredicate getVotePredicate(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! Gather from register file instruction. Similar to register region but with
+ * indirect addressing (Gen extension)
+ */
+ class RGatherInstruction : public Instruction {
+ public:
+ /*!< Source ID for the indices */
+ static const uint32_t indexID = 0;
+ /*! Get the indices for the gather */
+ INLINE Register getIndices(void) const { return this->getSrc(indexID); }
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! OBlock read. Only the first lane is considered for the address
+ * (Gen extension)
+ */
+ class OBReadInstruction : public Instruction {
+ public:
+ /*! Get the address register */
+ INLINE Register getAddress(void) const { return this->getSrc(0); }
+ /*! Get the value (i.e. destination here) */
+ INLINE Register getValue(void) const { return this->getDst(0); }
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
+ /*! OBlock write. Only the first lane is considered for the address
+ * (Gen extension)
+ */
+ class OBWriteInstruction : public Instruction {
+ public:
+ /*! Get the address register */
+ INLINE Register getAddress(void) const { return this->getSrc(0); }
+ /*! Get the value to write */
+ INLINE Register getValue(void) const { return this->getSrc(1); }
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
/*! Specialize the instruction. Also performs typechecking first based on the
* opcode. Crashes if it fails
*/
Instruction FENCE(AddressSpace space);
/*! label labelIndex */
Instruction LABEL(LabelIndex labelIndex);
+ /*! region.offset.stride dst {src1,...,src_srcNum} */
+ Instruction REGION(uint32_t offset, uint32_t vstride, uint32_t width, uint32_t hstride, Register dst, Tuple src, uint32_t srcNum);
+ /*! vote.predcate dst src */
+ Instruction VOTE(VotePredicate predicate, Register dst, Register src);
+ /*! rgather dst index {src...} (tuple contains index and sources) */
+ Instruction RGATHER(Register dst, Tuple tuple, uint32_t srcNum);
+ /*! obread dst address */
+ Instruction OBREAD(Register dst, Register address);
+ /*! obwrite address data */
+ Instruction OBWRITE(Register address, Register value);
} /* namespace ir */
} /* namespace gbe */
DECL_INSN(FENCE, FenceInstruction)
DECL_INSN(LABEL, LabelInstruction)
+// OpenCL Gen extensions
+DECL_INSN(REGION, RegionInstruction)
+DECL_INSN(VOTE, VoteInstruction)
+DECL_INSN(RGATHER, RGatherInstruction)
+DECL_INSN(OBREAD, OBReadInstruction)
+DECL_INSN(OBWRITE, OBWriteInstruction)
+
namespace gbe {
namespace ir {
+ /*! Defines the size of the pointers. All the functions from the unit will
+ * use the same pointer size as the unit they belong to
+ */
+ enum PointerSize {
+ POINTER_32_BITS = 32,
+ POINTER_64_BITS = 64
+ };
+
/*! Basically provides the size of the register */
enum RegisterFamily : uint8_t {
FAMILY_BOOL = 0,
auto it = functions.find(name);
if (it != functions.end())
return NULL;
- Function *fn = GBE_NEW(Function, name);
+ Function *fn = GBE_NEW(Function, name, *this);
functions[name] = fn;
return fn;
}
// A unit contains a set of functions
class Function;
- /*! Defines the size of the pointers. All the functions from the unit will
- * use the same pointer size as the unit they belong to
- */
- enum PointerSize {
- POINTER_32_BITS = 32,
- POINTER_64_BITS = 64
- };
-
/*! Complete unit of compilation. It contains a set of functions and a set of
* constant the functions may refer to.
*/
/*! Build the intrinsic hash map */
OCLIntrinsicMap(void) {
#define DECL_LLVM_GEN_FUNCTION(ID, NAME) \
- map.insert(std::make_pair("__gen_ocl_"#NAME, GEN_OCL_##ID));
+ map.insert(std::make_pair(#NAME, GEN_OCL_##ID));
#include "llvm_gen_ocl_function.hxx"
#undef DECL_LLVM_GEN_FUNCTION
}
}
}
- // With OCL there is no side effect for any called functions. So do nothing
- // when there is no returned value
- if (I.getType() == Type::getVoidTy(I.getContext()))
- NOT_SUPPORTED;
-
// Get the name of the called function and handle it
const std::string fnName = Callee->getName();
auto it = instrinsicMap.map.find(fnName);
regTranslator.newScalarProxy(ir::ocl::goffset1, dst); break;
case GEN_OCL_GET_GLOBAL_OFFSET2:
regTranslator.newScalarProxy(ir::ocl::goffset2, dst); break;
+ case GEN_OCL_OBREAD:
+ case GEN_OCL_REGION1:
+ case GEN_OCL_REGION2:
+ case GEN_OCL_REGION3:
+ case GEN_OCL_REGION4:
+ case GEN_OCL_REGION5:
+ case GEN_OCL_REGION6:
+ case GEN_OCL_REGION7:
+ case GEN_OCL_REGION8:
+ case GEN_OCL_RGATHER1:
+ case GEN_OCL_RGATHER2:
+ case GEN_OCL_RGATHER3:
+ case GEN_OCL_RGATHER4:
+ case GEN_OCL_RGATHER5:
+ case GEN_OCL_RGATHER6:
+ case GEN_OCL_RGATHER7:
+ case GEN_OCL_RGATHER8:
+ case GEN_OCL_ALL:
+ case GEN_OCL_ANY:
+ // No structure can be returned
+ GBE_ASSERT(I.hasStructRetAttr() == false);
+ this->newRegister(&I);
+ break;
+ case GEN_OCL_OBWRITE:
+ case GEN_OCL_FORCE_SIMD8:
+ case GEN_OCL_FORCE_SIMD16:
+ break;
default: NOT_SUPPORTED;
};
-#if 0
- if (fnName == "__gen_ocl_get_group_id0")
- regTranslator.newScalarProxy(ir::ocl::groupid0, dst);
- else if (fnName == "__gen_ocl_get_group_id1")
- regTranslator.newScalarProxy(ir::ocl::groupid1, dst);
- else if (fnName == "__gen_ocl_get_group_id2")
- regTranslator.newScalarProxy(ir::ocl::groupid2, dst);
- else if (fnName == "__gen_ocl_get_local_id0")
- regTranslator.newScalarProxy(ir::ocl::lid0, dst);
- else if (fnName == "__gen_ocl_get_local_id1")
- regTranslator.newScalarProxy(ir::ocl::lid1, dst);
- else if (fnName == "__gen_ocl_get_local_id2")
- regTranslator.newScalarProxy(ir::ocl::lid2, dst);
- else if (fnName == "__gen_ocl_get_num_groups0")
- regTranslator.newScalarProxy(ir::ocl::numgroup0, dst);
- else if (fnName == "__gen_ocl_get_num_groups1")
- regTranslator.newScalarProxy(ir::ocl::numgroup1, dst);
- else if (fnName == "__gen_ocl_get_num_groups2")
- regTranslator.newScalarProxy(ir::ocl::numgroup2, dst);
- else if (fnName == "__gen_ocl_get_local_size0")
- regTranslator.newScalarProxy(ir::ocl::lsize0, dst);
- else if (fnName == "__gen_ocl_get_local_size1")
- regTranslator.newScalarProxy(ir::ocl::lsize1, dst);
- else if (fnName == "__gen_ocl_get_local_size2")
- regTranslator.newScalarProxy(ir::ocl::lsize2, dst);
- else if (fnName == "__gen_ocl_get_global_size0")
- regTranslator.newScalarProxy(ir::ocl::gsize0, dst);
- else if (fnName == "__gen_ocl_get_global_size1")
- regTranslator.newScalarProxy(ir::ocl::gsize1, dst);
- else if (fnName == "__gen_ocl_get_global_size2")
- regTranslator.newScalarProxy(ir::ocl::gsize2, dst);
- else if (fnName == "__gen_ocl_get_global_offset0")
- regTranslator.newScalarProxy(ir::ocl::goffset0, dst);
- else if (fnName == "__gen_ocl_get_global_offset1")
- regTranslator.newScalarProxy(ir::ocl::goffset1, dst);
- else if (fnName == "__gen_ocl_get_global_offset2")
- regTranslator.newScalarProxy(ir::ocl::goffset2, dst);
- else
- NOT_SUPPORTED;
-#endif
}
+ struct U64CPVExtractFunctor {
+ U64CPVExtractFunctor(ir::Context &ctx) : ctx(ctx) {}
+ template <typename T> INLINE uint64_t operator() (const T &t) {
+ return uint64_t(t);
+ }
+ ir::Context &ctx;
+ };
+
void GenWriter::emitCallInst(CallInst &I) {
if (Function *F = I.getCalledFunction()) {
if (F->getIntrinsicID() != 0) {
break;
default: NOT_IMPLEMENTED;
}
+ } else {
+ // Get the name of the called function and handle it
+ Value *Callee = I.getCalledValue();
+ const std::string fnName = Callee->getName();
+ auto it = instrinsicMap.map.find(fnName);
+ GBE_ASSERT(it != instrinsicMap.map.end());
+
+ // Get the function arguments
+ CallSite CS(&I);
+ CallSite::arg_iterator AI = CS.arg_begin();
+#if GBE_DEBUG
+ CallSite::arg_iterator AE = CS.arg_end();
+#endif /* GBE_DEBUG */
+
+
+ switch (it->second) {
+ case GEN_OCL_REGION1:
+ case GEN_OCL_REGION2:
+ case GEN_OCL_REGION3:
+ case GEN_OCL_REGION4:
+ case GEN_OCL_REGION5:
+ case GEN_OCL_REGION6:
+ case GEN_OCL_REGION7:
+ case GEN_OCL_REGION8:
+ {
+ // Get region offset
+ GBE_ASSERT(AI != AE);
+ Constant *CPV = dyn_cast<Constant>(*AI);
+ GBE_ASSERTM(CPV != NULL, "offset for register region must be constant");
+ const uint32_t offset = processConstant<uint32_t>(CPV, U64CPVExtractFunctor(ctx));
+ ++AI;
+
+ // Get region vertical stride
+ GBE_ASSERT(AI != AE);
+ CPV = dyn_cast<Constant>(*AI);
+ GBE_ASSERTM(CPV != NULL, "vstride for register region must be constant");
+ const uint32_t vstride = processConstant<uint32_t>(CPV, U64CPVExtractFunctor(ctx));
+ ++AI;
+
+ // Get region width
+ GBE_ASSERT(AI != AE);
+ CPV = dyn_cast<Constant>(*AI);
+ GBE_ASSERTM(CPV != NULL, "width for register region must be constant");
+ const uint32_t width = processConstant<uint32_t>(CPV, U64CPVExtractFunctor(ctx));
+ ++AI;
+
+ // Get region horizontal stride
+ GBE_ASSERT(AI != AE);
+ CPV = dyn_cast<Constant>(*AI);
+ GBE_ASSERTM(CPV != NULL, "vstride for register region must be constant");
+ const uint32_t hstride = processConstant<uint32_t>(CPV, U64CPVExtractFunctor(ctx));
+ ++AI;
+
+ // Build the tuple data for the sources and the destination register
+ const uint32_t srcNum = uint32_t(it->second) - GEN_OCL_REGION1 + 1;
+ vector<ir::Register> tupleData; // put registers here
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ GBE_ASSERT(AI != AE);
+ const ir::Register reg = this->getRegister(*AI);
+ tupleData.push_back(reg);
+ ++AI;
+ }
+ GBE_ASSERT(AI == AE);
+ const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], srcNum);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.REGION(offset, vstride, width, hstride, dst, tuple, srcNum);
+ break;
+ }
+ case GEN_OCL_RGATHER1:
+ case GEN_OCL_RGATHER2:
+ case GEN_OCL_RGATHER3:
+ case GEN_OCL_RGATHER4:
+ case GEN_OCL_RGATHER5:
+ case GEN_OCL_RGATHER6:
+ case GEN_OCL_RGATHER7:
+ case GEN_OCL_RGATHER8:
+ {
+ // Build the tuple data for the sources and the destination register
+ const uint32_t srcNum = uint32_t(it->second) - GEN_OCL_RGATHER1 + 2;
+ vector<ir::Register> tupleData; // put registers here
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ GBE_ASSERT(AI != AE);
+ const ir::Register reg = this->getRegister(*AI);
+ tupleData.push_back(reg);
+ ++AI;
+ }
+ GBE_ASSERT(AI == AE);
+ const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], srcNum);
+ const ir::Register dst = this->getRegister(&I);
+ ctx.RGATHER(dst, tuple, srcNum);
+ break;
+ }
+ case GEN_OCL_ALL:
+ case GEN_OCL_ANY:
+ {
+ GBE_ASSERT(AI != AE);
+ const ir::Register src = this->getRegister(*AI);
+ const ir::Register dst = this->getRegister(&I);
+ const ir::VotePredicate pred = it->second == GEN_OCL_ANY ? ir::VOTE_ANY : ir::VOTE_ALL;
+ ctx.VOTE(pred, dst, src);
+ break;
+ }
+ case GEN_OCL_OBREAD:
+ {
+ GBE_ASSERT(AI != AE);
+ const ir::Register dst = this->getRegister(&I);
+ const ir::Register src = this->getRegister(*AI);
+ ctx.OBREAD(dst, src);
+ break;
+ }
+ case GEN_OCL_OBWRITE:
+ {
+ GBE_ASSERT(AI != AE);
+ const ir::Register address = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE);
+ const ir::Register value = this->getRegister(*AI);
+ ctx.OBWRITE(address, value);
+ break;
+ }
+ case GEN_OCL_FORCE_SIMD8: ctx.setSimdWidth(8); break;
+ case GEN_OCL_FORCE_SIMD16: ctx.setSimdWidth(16); break;
+ default:
+ break;
+ }
}
}
}
- struct AllocaSizeFunctor
- {
- AllocaSizeFunctor(ir::Context &ctx) : ctx(ctx) {}
- template <typename T> INLINE uint64_t operator() (const T &t) {
- return uint64_t(t);
- }
- ir::Context &ctx;
- };
-
-
void GenWriter::regAllocateAllocaInst(AllocaInst &I) {
this->newRegister(&I);
}
else {
Constant *CPV = dyn_cast<Constant>(src);
if (CPV) {
- const uint64_t elemNum = processConstant<uint64_t>(CPV, AllocaSizeFunctor(ctx));
+ const uint64_t elemNum = processConstant<uint64_t>(CPV, U64CPVExtractFunctor(ctx));
ir::Immediate imm = ctx.getImmediate(immIndex);
imm.data.u64 = ALIGN(imm.data.u64 * elemNum, 4);
ctx.setImmediate(immIndex, imm);
-DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID0, get_group_id0)
-DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID1, get_group_id1)
-DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID2, get_group_id2)
-DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID0, get_local_id0)
-DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID1, get_local_id1)
-DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID2, get_local_id2)
-DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS0, get_num_groups0)
-DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS1, get_num_groups1)
-DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS2, get_num_groups2)
-DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE0, get_local_size0)
-DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE1, get_local_size1)
-DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE2, get_local_size2)
-DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE0, get_global_size0)
-DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE1, get_global_size1)
-DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE2, get_global_size2)
-DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET0, get_global_offset0)
-DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET1, get_global_offset1)
-DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET2, get_global_offset2)
+DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID0, __gen_ocl_get_group_id0)
+DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID1, __gen_ocl_get_group_id1)
+DECL_LLVM_GEN_FUNCTION(GET_GROUP_ID2, __gen_ocl_get_group_id2)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID0, __gen_ocl_get_local_id0)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID1, __gen_ocl_get_local_id1)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_ID2, __gen_ocl_get_local_id2)
+DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS0, __gen_ocl_get_num_groups0)
+DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS1, __gen_ocl_get_num_groups1)
+DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS2, __gen_ocl_get_num_groups2)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE0, __gen_ocl_get_local_size0)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE1, __gen_ocl_get_local_size1)
+DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE2, __gen_ocl_get_local_size2)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE0, __gen_ocl_get_global_size0)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE1, __gen_ocl_get_global_size1)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE2, __gen_ocl_get_global_size2)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET0, __gen_ocl_get_global_offset0)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET1, __gen_ocl_get_global_offset1)
+DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_OFFSET2, __gen_ocl_get_global_offset2)
+
+// Register regions
+DECL_LLVM_GEN_FUNCTION(REGION1, _Z16__gen_ocl_regioniiiii) // order matters here!
+DECL_LLVM_GEN_FUNCTION(REGION2, _Z16__gen_ocl_regioniiiiii)
+DECL_LLVM_GEN_FUNCTION(REGION3, _Z16__gen_ocl_regioniiiiiii)
+DECL_LLVM_GEN_FUNCTION(REGION4, _Z16__gen_ocl_regioniiiiiiii)
+DECL_LLVM_GEN_FUNCTION(REGION5, _Z16__gen_ocl_regioniiiiiiiii)
+DECL_LLVM_GEN_FUNCTION(REGION6, _Z16__gen_ocl_regioniiiiiiiiii)
+DECL_LLVM_GEN_FUNCTION(REGION7, _Z16__gen_ocl_regioniiiiiiiiiii)
+DECL_LLVM_GEN_FUNCTION(REGION8, _Z16__gen_ocl_regioniiiiiiiiiiii)
+
+// Gather from register file
+DECL_LLVM_GEN_FUNCTION(RGATHER1, _Z17__gen_ocl_rgatherti) // order matters here!
+DECL_LLVM_GEN_FUNCTION(RGATHER2, _Z17__gen_ocl_rgathertii)
+DECL_LLVM_GEN_FUNCTION(RGATHER3, _Z17__gen_ocl_rgathertiii)
+DECL_LLVM_GEN_FUNCTION(RGATHER4, _Z17__gen_ocl_rgathertiiii)
+DECL_LLVM_GEN_FUNCTION(RGATHER5, _Z17__gen_ocl_rgathertiiiii)
+DECL_LLVM_GEN_FUNCTION(RGATHER6, _Z17__gen_ocl_rgathertiiiiii)
+DECL_LLVM_GEN_FUNCTION(RGATHER7, _Z17__gen_ocl_rgathertiiiiiii)
+DECL_LLVM_GEN_FUNCTION(RGATHER8, _Z17__gen_ocl_rgathertiiiiiiii)
+
+// Uniform conditions
+DECL_LLVM_GEN_FUNCTION(ALL, _Z13__gen_ocl_allt)
+DECL_LLVM_GEN_FUNCTION(ANY, _Z13__gen_ocl_anyt)
+
+// OBlock reads / writes
+DECL_LLVM_GEN_FUNCTION(OBREAD, _Z16__gen_ocl_obreadPKU3AS1v)
+DECL_LLVM_GEN_FUNCTION(OBWRITE, _Z17__gen_ocl_obwritePKU3AS1vi)
+
+// To force SIMD8/16 compilation
+DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8)
+DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
DECL_SELECT4(float4, float, int4, 0x80000000)
#undef DECL_SELECT4
-__attribute__((overloadable,always_inline))
-inline float2 mad(float2 a, float2 b, float2 c) {
+INLINE_OVERLOADABLE inline float2 mad(float2 a, float2 b, float2 c) {
return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y));
}
-__attribute__((overloadable,always_inline))
-inline float3 mad(float3 a, float3 b, float3 c) {
+INLINE_OVERLOADABLE inline float3 mad(float3 a, float3 b, float3 c) {
return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z));
}
-__attribute__((overloadable,always_inline))
-inline float4 mad(float4 a, float4 b, float4 c) {
+INLINE_OVERLOADABLE inline float4 mad(float4 a, float4 b, float4 c) {
return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y),
mad(a.z,b.z,c.z), mad(a.w,b.w,c.w));
}
+/////////////////////////////////////////////////////////////////////////////
+// Extensions to manipulate the register file
+/////////////////////////////////////////////////////////////////////////////
+
+// Direct addressing register regions
+OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int);
+OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int);
+OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int);
+OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int);
+OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int);
+OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int);
+OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int, int);
+OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int, int, int);
+
+// Gather from register file
+OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int);
+OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int);
+OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int);
+OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int);
+OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int);
+OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int);
+OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int, int);
+OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int, int, int);
+
+/////////////////////////////////////////////////////////////////////////////
+// Extension to have uniform condition per hardware thread
+/////////////////////////////////////////////////////////////////////////////
+
+OVERLOADABLE unsigned short __gen_ocl_any(unsigned short cond);
+OVERLOADABLE unsigned short __gen_ocl_all(unsigned short cond);
+
+/////////////////////////////////////////////////////////////////////////////
+// Extension to support OBlock reads / writes
+/////////////////////////////////////////////////////////////////////////////
+
+OVERLOADABLE int __gen_ocl_obread(const __global void *address);
+OVERLOADABLE int __gen_ocl_obread(const __constant void *address);
+OVERLOADABLE int __gen_ocl_obread(const __local void *address);
+OVERLOADABLE void __gen_ocl_obwrite(const __global void *address, int);
+OVERLOADABLE void __gen_ocl_obwrite(const __local void *address, int);
+
+/////////////////////////////////////////////////////////////////////////////
+// Force the compilation to SIMD8 or SIMD16
+/////////////////////////////////////////////////////////////////////////////
+
+int __gen_ocl_force_simd8(void);
+int __gen_ocl_force_simd16(void);
+
+#define DECL_VOTE(TYPE) \
+__attribute__((overloadable,always_inline)) \
+TYPE __gen_ocl_any(TYPE cond) { \
+ return (TYPE) __gen_ocl_any((unsigned short) cond); \
+} \
+__attribute__((overloadable,always_inline)) \
+TYPE __gen_ocl_all(TYPE cond) { \
+ return (TYPE) __gen_ocl_all((unsigned short) cond); \
+}
+DECL_VOTE(unsigned int)
+DECL_VOTE(unsigned char)
+DECL_VOTE(int)
+DECL_VOTE(char)
+DECL_VOTE(short)
+DECL_VOTE(bool)
+#undef DECL_VOTE
#define NULL ((void*)0)
+#undef INLINE_OVERLOADABLE
+
#include "string"
namespace gbe {
std::string ocl_stdlib_str =
-"/* \n"
-" * Copyright © 2012 Intel Corporation\n"
-" *\n"
-" * This library is free software; you can redistribute it and/or\n"
-" * modify it under the terms of the GNU Lesser General Public\n"
-" * License as published by the Free Software Foundation; either\n"
-" * version 2 of the License, or (at your option) any later version.\n"
-" *\n"
-" * This library is distributed in the hope that it will be useful,\n"
-" * but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
-" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
-" * Lesser General Public License for more details.\n"
-" *\n"
-" * You should have received a copy of the GNU Lesser General Public\n"
-" * License along with this library. If not, see <http://www.gnu.org/licenses/>.\n"
-" *\n"
-" * Author: Benjamin Segovia <benjamin.segovia@intel.com>\n"
-" */\n"
+"#define INLINE_OVERLOADABLE __attribute__((overloadable,always_inline))\n"
+"#define OVERLOADABLE __attribute__((overloadable))\n"
"\n"
"/////////////////////////////////////////////////////////////////////////////\n"
"// OpenCL basic types\n"
"DECL_SELECT4(float4, float, int4, 0x80000000)\n"
"#undef DECL_SELECT4\n"
"\n"
-"__attribute__((overloadable,always_inline))\n"
-"inline float2 mad(float2 a, float2 b, float2 c) {\n"
+"INLINE_OVERLOADABLE inline float2 mad(float2 a, float2 b, float2 c) {\n"
" return (float2)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y));\n"
"}\n"
-"__attribute__((overloadable,always_inline))\n"
-"inline float3 mad(float3 a, float3 b, float3 c) {\n"
+"INLINE_OVERLOADABLE inline float3 mad(float3 a, float3 b, float3 c) {\n"
" return (float3)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y), mad(a.z,b.z,c.z));\n"
"}\n"
-"__attribute__((overloadable,always_inline))\n"
-"inline float4 mad(float4 a, float4 b, float4 c) {\n"
+"INLINE_OVERLOADABLE inline float4 mad(float4 a, float4 b, float4 c) {\n"
" return (float4)(mad(a.x,b.x,c.x), mad(a.y,b.y,c.y),\n"
" mad(a.z,b.z,c.z), mad(a.w,b.w,c.w));\n"
"}\n"
"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Extensions to manipulate the register file\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"\n"
+"// Direct addressing register regions\n"
+"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int);\n"
+"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int);\n"
+"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_region(int offset, int vstride, int width, int hstride, int, int, int, int, int, int, int, int);\n"
+"\n"
+"// Gather from register file\n"
+"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int);\n"
+"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int);\n"
+"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int, int);\n"
+"OVERLOADABLE int __gen_ocl_rgather(unsigned short index, int, int, int, int, int, int, int, int);\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Extension to have uniform condition per hardware thread\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"\n"
+"OVERLOADABLE unsigned short __gen_ocl_any(unsigned short cond);\n"
+"OVERLOADABLE unsigned short __gen_ocl_all(unsigned short cond);\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Extension to support OBlock reads / writes\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"\n"
+"OVERLOADABLE int __gen_ocl_obread(const __global void *address);\n"
+"OVERLOADABLE int __gen_ocl_obread(const __constant void *address);\n"
+"OVERLOADABLE int __gen_ocl_obread(const __local void *address);\n"
+"OVERLOADABLE void __gen_ocl_obwrite(const __global void *address, int);\n"
+"OVERLOADABLE void __gen_ocl_obwrite(const __local void *address, int);\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"// Force the compilation to SIMD8 or SIMD16\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"\n"
+"int __gen_ocl_force_simd8(void);\n"
+"int __gen_ocl_force_simd16(void);\n"
+"\n"
+"#define DECL_VOTE(TYPE) \\\n"
+"__attribute__((overloadable,always_inline)) \\\n"
+"TYPE __gen_ocl_any(TYPE cond) { \\\n"
+" return (TYPE) __gen_ocl_any((unsigned short) cond); \\\n"
+"} \\\n"
+"__attribute__((overloadable,always_inline)) \\\n"
+"TYPE __gen_ocl_all(TYPE cond) { \\\n"
+" return (TYPE) __gen_ocl_all((unsigned short) cond); \\\n"
+"}\n"
+"DECL_VOTE(unsigned int)\n"
+"DECL_VOTE(unsigned char)\n"
+"DECL_VOTE(int)\n"
+"DECL_VOTE(char)\n"
+"DECL_VOTE(short)\n"
+"DECL_VOTE(bool)\n"
+"#undef DECL_VOTE\n"
"\n"
"#define NULL ((void*)0)\n"
+"#undef INLINE_OVERLOADABLE\n"
+"\n"
;
}
-__kernel void
-test_copy_buffer(__global float* src, __global float* dst)
-{
- int id = (int)get_global_id(0);
- dst[id] = src[id];
-}
-
+__kernel void\r
+test_copy_buffer(__global float* src, __global float* dst)\r
+{\r
+ int id = (int)get_global_id(0);\r
+ dst[id] = src[id];\r
+}\r
+\r
-__kernel void
-test_copy_buffer_row(__global int *src, __global int *dst, __global int *data)
-{
- int row = data[0];
- int size = data[1];
- int id = (int) get_global_id(0);
- for (; id < size; id += row) dst[id] = src[id];
-}
-
+__kernel void\r
+test_copy_buffer_row(__global int *src, __global int *dst, __global int *data)\r
+{\r
+ int row = data[0];\r
+ int size = data[1];\r
+ int id = (int) get_global_id(0);\r
+ for (; id < size; id += row) dst[id] = src[id];\r
+}\r
+\r
-__kernel void
-test_write_only(__global int *dst)
-{
- int id = (int)get_global_id(0);
- dst[id] = id;
-}
-
+__kernel void\r
+test_write_only(__global int *dst)\r
+{\r
+ int id = (int)get_global_id(0);\r
+ dst[id] = id;\r
+}\r
+\r
link_directories (${LLVM_LIBRARY_DIRS})
ADD_LIBRARY(utests SHARED
- utest_error.c
- utest_helper.cpp
- utest_file_map.cpp
- utest_assert.cpp
- utest.cpp
- app_mandelbrot.cpp
- compiler_write_only.cpp
- compiler_write_only_shorts.cpp
- compiler_write_only_bytes.cpp
- compiler_sub_bytes.cpp
- compiler_sub_shorts.cpp
- compiler_copy_buffer.cpp
- compiler_copy_buffer_row.cpp
- compiler_byte_scatter.cpp
- compiler_short_scatter.cpp
- compiler_uint2_copy.cpp
- compiler_uint3_copy.cpp
- compiler_uint3_unaligned_copy.cpp
- compiler_if_else.cpp
- compiler_unstructured_branch0.cpp
- compiler_unstructured_branch1.cpp
- compiler_unstructured_branch2.cpp
- compiler_unstructured_branch3.cpp
- compiler_lower_return0.cpp
- compiler_lower_return1.cpp
- compiler_lower_return2.cpp
-# unsupported indirect load of kernel arguments
-# compiler_argument_structure_indirect.cpp
- compiler_array.cpp
- compiler_array0.cpp
- compiler_function_argument.cpp
- compiler_function_argument0.cpp
- compiler_function_argument1.cpp
- compiler_argument_structure.cpp)
-# compiler_local_slm.cpp)
+ utest_error.c
+ app_mandelbrot.cpp
+ compiler_argument_structure.cpp
+ compiler_array0.cpp
+ compiler_array.cpp
+ compiler_array1.cpp
+ compiler_array2.cpp
+ compiler_array3.cpp
+ compiler_byte_scatter.cpp
+ compiler_copy_buffer.cpp
+ compiler_copy_buffer_row.cpp
+ compiler_function_argument0.cpp
+ compiler_function_argument1.cpp
+ compiler_function_argument.cpp
+ compiler_gather_register_file.cpp
+ compiler_gather_register_file0.cpp
+ compiler_gather_register_file1.cpp
+ compiler_if_else.cpp
+ compiler_lower_return0.cpp
+ compiler_lower_return1.cpp
+ compiler_lower_return2.cpp
+ compiler_obread.cpp
+ compiler_obwrite.cpp
+ compiler_region.cpp
+ compiler_region0.cpp
+ compiler_region1.cpp
+ compiler_short_scatter.cpp
+ compiler_sub_bytes.cpp
+ compiler_sub_shorts.cpp
+ compiler_uint2_copy.cpp
+ compiler_uint3_copy.cpp
+ compiler_uint3_unaligned_copy.cpp
+ compiler_unstructured_branch0.cpp
+ compiler_unstructured_branch1.cpp
+ compiler_unstructured_branch2.cpp
+ compiler_unstructured_branch3.cpp
+ compiler_vote_all.cpp
+ compiler_vote_any.cpp
+ compiler_write_only_bytes.cpp
+ compiler_write_only.cpp
+ compiler_write_only_shorts.cpp
+ utest_assert.cpp
+ utest.cpp
+ utest_file_map.cpp
+ utest_helper.cpp)
TARGET_LINK_LIBRARIES(utests cl m)
compiler_argument_structure.cpp \
compiler_array0.cpp \
compiler_array.cpp \
+ compiler_array1.cpp \
+ compiler_array2.cpp \
+ compiler_array3.cpp \
compiler_byte_scatter.cpp \
compiler_copy_buffer.cpp \
compiler_copy_buffer_row.cpp \
compiler_function_argument0.cpp \
compiler_function_argument1.cpp \
compiler_function_argument.cpp \
+ compiler_gather_register_file.cpp \
+ compiler_gather_register_file0.cpp \
+ compiler_gather_register_file1.cpp \
compiler_if_else.cpp \
compiler_lower_return0.cpp \
compiler_lower_return1.cpp \
compiler_lower_return2.cpp \
+ compiler_obread.cpp \
+ compiler_obwrite.cpp \
+ compiler_region.cpp \
+ compiler_region0.cpp \
+ compiler_region1.cpp \
compiler_short_scatter.cpp \
compiler_sub_bytes.cpp \
compiler_sub_shorts.cpp \
compiler_unstructured_branch1.cpp \
compiler_unstructured_branch2.cpp \
compiler_unstructured_branch3.cpp \
+ compiler_vote_all.cpp \
+ compiler_vote_any.cpp \
compiler_write_only_bytes.cpp \
compiler_write_only.cpp \
compiler_write_only_shorts.cpp \