specialRegs.insert(reg);
}
});
- kernel->curbeSize = ALIGN(kernel->curbeSize, 32); // 32 == GEN_REG_SIZE
+ if (this->simdWidth == 8)
+ kernel->curbeSize = ALIGN(kernel->curbeSize, 32); // 32 == GEN_REG_SIZE
+ else
+ kernel->curbeSize = ALIGN(kernel->curbeSize, 64); // 64 == 2*GEN_REG_SIZE
// Local IDs always go at the end of the curbe
const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
const uint32_t subnr = ((offset + GEN_REG_SIZE) % GEN_REG_SIZE) / typeSize;
GBE_ASSERT(data.family == ir::FAMILY_DWORD); // XXX support the rest
if (this->isScalarReg(reg) == true)
- RA.insert(std::make_pair(reg, brw_vec1_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
+ RA.insert(std::make_pair(reg, GenReg::vec1(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
else if (this->simdWidth == 8)
- RA.insert(std::make_pair(reg, brw_vec8_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
+ RA.insert(std::make_pair(reg, GenReg::vec8(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
else if (this->simdWidth == 16)
- RA.insert(std::make_pair(reg, brw_vec16_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
+ RA.insert(std::make_pair(reg, GenReg::vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
}
}
allocatePayloadReg(GBE_CURBE_GROUP_NUM_Z, 0, ocl::numgroup2);
// Group IDs are always allocated by the hardware in r0
- RA.insert(std::make_pair(ocl::groupid0, brw_vec1_reg(GEN_GENERAL_REGISTER_FILE, 0, 1)));
- RA.insert(std::make_pair(ocl::groupid1, brw_vec1_reg(GEN_GENERAL_REGISTER_FILE, 0, 6)));
- RA.insert(std::make_pair(ocl::groupid2, brw_vec1_reg(GEN_GENERAL_REGISTER_FILE, 0, 7)));
+ RA.insert(std::make_pair(ocl::groupid0, GenReg::vec1(GEN_GENERAL_REGISTER_FILE, 0, 1)));
+ RA.insert(std::make_pair(ocl::groupid1, GenReg::vec1(GEN_GENERAL_REGISTER_FILE, 0, 6)));
+ RA.insert(std::make_pair(ocl::groupid2, GenReg::vec1(GEN_GENERAL_REGISTER_FILE, 0, 7)));
// Allocate all input parameters
const uint32_t inputNum = fn.inputNum();
});
// Allocate all used registers. Just crash when we run out-of-registers
- // r0 is always taken by the HW
- uint32_t grfOffset = kernel->getCurbeSize() + GEN_REG_SIZE;
- if (simdWidth >= 16)
- grfOffset = ALIGN(grfOffset, 2 * GEN_REG_SIZE);
- GBE_ASSERT(simdWidth != 32); // a bit more complicated see later
+ // r0 is always taken by the HW. We also always write down local IDs after
+ // the curbe data
+ uint32_t grfOffset = kernel->getCurbeSize() + GEN_REG_SIZE
+ + 3 * sizeof(uint32_t) * this->simdWidth;
+ GBE_ASSERT(simdWidth != 32); // XXX a bit more complicated see later
+ if (simdWidth == 16) grfOffset = ALIGN(grfOffset, 64);
for (auto reg : usedRegs) {
if (fn.isSpecialReg(reg) == true) continue; // already done
if (fn.getInput(reg) != NULL) continue; // already done
const uint32_t subnr = (grfOffset % GEN_REG_SIZE) / typeSize;
GBE_ASSERT(family == FAMILY_DWORD); // XXX Do the rest
GBE_ASSERT(grfOffset + simdWidth*typeSize < GEN_GRF_SIZE);
- RA.insert(std::make_pair(reg, brw_vec16_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
+ RA.insert(std::make_pair(reg, GenReg::vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr)));
grfOffset += simdWidth * typeSize;
}
}
// Default type is FLOAT
GBE_ASSERT(type == TYPE_U32 || type == TYPE_S32 || type == TYPE_FLOAT);
if (type == TYPE_U32) {
- dst = retype(dst, GEN_TYPE_UD);
- src0 = retype(src0, GEN_TYPE_UD);
- src1 = retype(src1, GEN_TYPE_UD);
+ dst = GenReg::retype(dst, GEN_TYPE_UD);
+ src0 = GenReg::retype(src0, GEN_TYPE_UD);
+ src1 = GenReg::retype(src1, GEN_TYPE_UD);
} else if (type == TYPE_S32) {
- dst = retype(dst, GEN_TYPE_D);
- src0 = retype(src0, GEN_TYPE_D);
- src1 = retype(src1, GEN_TYPE_D);
+ dst = GenReg::retype(dst, GEN_TYPE_D);
+ src0 = GenReg::retype(src0, GEN_TYPE_D);
+ src1 = GenReg::retype(src1, GEN_TYPE_D);
}
// Output the binary instruction
switch (opcode) {
case OP_ADD: p->ADD(dst, src0, src1); break;
- case OP_MUL: p->MUL(dst, src0, src1); break;
+ case OP_MUL:
+ {
+ p->MUL(dst, src0, src1);
+#if 0
+ if (type == TYPE_FLOAT) p->MUL(dst, src0, src1);
+ else {
+
+ }
+#endif
+ break;
+ }
default: NOT_IMPLEMENTED;
}
}
const GenReg dst = reg(insn.getDst(0));
switch (type) {
- case TYPE_U32: p->MOV(retype(dst, GEN_TYPE_UD), brw_imm_ud(imm.data.u32)); break;
- case TYPE_S32: p->MOV(retype(dst, GEN_TYPE_D), brw_imm_d(imm.data.s32)); break;
- case TYPE_FLOAT: p->MOV(dst, brw_imm_f(imm.data.f32)); break;
+ case TYPE_U32: p->MOV(GenReg::retype(dst, GEN_TYPE_UD), GenReg::immud(imm.data.u32)); break;
+ case TYPE_S32: p->MOV(GenReg::retype(dst, GEN_TYPE_D), GenReg::immd(imm.data.s32)); break;
+ case TYPE_FLOAT: p->MOV(dst, GenReg::immf(imm.data.f32)); break;
default: NOT_SUPPORTED;
}
}
void GenContext::emitLoadInstruction(const ir::LoadInstruction &insn) {
+ using namespace ir;
+ GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL);
+ GBE_ASSERT(insn.getValueNum() == 1);
+ GBE_ASSERT(insn.isAligned() == true);
+ GBE_ASSERT(this->simdWidth <= 16);
+ const GenReg address = reg(insn.getAddress());
+ const GenReg value = reg(insn.getValue(0));
+ // XXX remove that later. Now we just copy everything to GRFs to make it
+ // contiguous
+ if (this->simdWidth == 8 || this->simdWidth == 16)
+ p->UNTYPED_READ(value, address, 0, 1);
+ else
+ NOT_IMPLEMENTED;
}
void GenContext::emitStoreInstruction(const ir::StoreInstruction &insn) {
- const GenReg dst = reg(insn.getDst(0));
-
+ using namespace ir;
+ GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL);
+ GBE_ASSERT(insn.getValueNum() == 1);
+ GBE_ASSERT(insn.isAligned() == true);
+ GBE_ASSERT(this->simdWidth <= 16);
+ const GenReg address = reg(insn.getAddress());
+ const GenReg value = reg(insn.getValue(0));
+ // XXX remove that later. Now we just copy everything to GRFs to make it
+ // contiguous
+ if (this->simdWidth == 8) {
+ p->MOV(GenReg::vec8grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
+ p->MOV(GenReg::vec8grf(113, 0), GenReg::retype(value, GEN_TYPE_F));
+ p->UNTYPED_WRITE(GenReg::vec8grf(112, 0), 0, 1);
+ } else if (this->simdWidth == 16) {
+ p->MOV(GenReg::vec16grf(112, 0), GenReg::retype(address, GEN_TYPE_F));
+ p->MOV(GenReg::vec16grf(114, 0), GenReg::retype(value, GEN_TYPE_F));
+ p->UNTYPED_WRITE(GenReg::vec16grf(112, 0), 0, 1);
+ } else
+ NOT_IMPLEMENTED;
}
void GenContext::emitFenceInstruction(const ir::FenceInstruction &insn) {}
void GenContext::emitLabelInstruction(const ir::LabelInstruction &insn) {}
this->allocateRegister();
this->emitInstructionStream();
p->EOT(127);
- genKernel->insnNum = p->nr_insn;
+ genKernel->insnNum = p->insnNum;
genKernel->insns = GBE_NEW_ARRAY(GenInstruction, genKernel->insnNum);
std::memcpy(genKernel->insns, p->store, genKernel->insnNum * sizeof(GenInstruction));
if (OCL_OUTPUT_ASM) {
#define GEN_CHANNEL_Z 2
#define GEN_CHANNEL_W 3
-#define GEN6_COMPRESSION_1Q 0
-#define GEN6_COMPRESSION_2Q 1
-#define GEN6_COMPRESSION_3Q 2
-#define GEN6_COMPRESSION_4Q 3
-#define GEN6_COMPRESSION_1H 0
-#define GEN6_COMPRESSION_2H 2
+#define GEN_COMPRESSION_Q1 0
+#define GEN_COMPRESSION_Q2 1
+#define GEN_COMPRESSION_Q3 2
+#define GEN_COMPRESSION_Q4 3
+#define GEN_COMPRESSION_H1 0
+#define GEN_COMPRESSION_H2 2
#define GEN_CONDITIONAL_NONE 0
#define GEN_CONDITIONAL_Z 1
#define GEN_DEPENDENCY_NOTCHECKED 2
#define GEN_DEPENDENCY_DISABLE 3
-#define GEN_EXECUTE_1 0
-#define GEN_EXECUTE_2 1
-#define GEN_EXECUTE_4 2
-#define GEN_EXECUTE_8 3
-#define GEN_EXECUTE_16 4
-#define GEN_EXECUTE_32 5
-
#define GEN_HORIZONTAL_STRIDE_0 0
#define GEN_HORIZONTAL_STRIDE_1 1
#define GEN_HORIZONTAL_STRIDE_2 2
/*! Gen SFID */
enum GenMessageTarget {
GEN_SFID_NULL = 0,
- GEN_SFID_MATH = 1, /* Only valid on Gen4-5 */
+ GEN_SFID_MATH = 1,
GEN_SFID_SAMPLER = 2,
GEN_SFID_MESSAGE_GATEWAY = 3,
GEN_SFID_DATAPORT_READ = 4,
GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4,
GEN6_SFID_DATAPORT_RENDER_CACHE = 5,
GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
- GEN7_SFID_DATAPORT_DATA_CACHE = 10,
+ GEN_SFID_DATAPORT_DATA_CACHE = 10,
};
#define GEN_PREDICATE_NONE 0
#define GEN_VERTICAL_STRIDE_256 9
#define GEN_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+/* Execution width */
#define GEN_WIDTH_1 0
#define GEN_WIDTH_2 1
#define GEN_WIDTH_4 2
#define GEN_WIDTH_8 3
#define GEN_WIDTH_16 4
+#define GEN_WIDTH_32 5
+
+/* Channels to enable for the untyped reads and writes */
+#define GEN_UNTYPED_RED (1 << 0)
+#define GEN_UNTYPED_GREEN (1 << 1)
+#define GEN_UNTYPED_BLUE (1 << 2)
+#define GEN_UNTYPED_ALPHA (1 << 3)
-/*! Channels to enable for the untyped reads and writes */
-#define GEN7_UNTYPED_RED (1 << 0)
-#define GEN7_UNTYPED_GREEN (1 << 1)
-#define GEN7_UNTYPED_BLUE (1 << 2)
-#define GEN7_UNTYPED_ALPHA (1 << 3)
+/* SIMD mode for untyped reads and writes */
+#define GEN_UNTYPED_SIMD4x2 0
+#define GEN_UNTYPED_SIMD16 1
+#define GEN_UNTYPED_SIMD8 2
+
+/* Data port message type */
+#define GEN_UNTYPED_READ 5
+#define GEN_UNTYPED_WRITE 13
#define GEN_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN_SAMPLER_SIMD_MODE_SIMD16 2
#define GEN_SAMPLER_SIMD_MODE_SIMD32_64 3
-#define GEN_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
-#define GEN_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
-#define GEN_DATAPORT_OWORD_BLOCK_2_OWORDS 2
-#define GEN_DATAPORT_OWORD_BLOCK_4_OWORDS 3
-#define GEN_DATAPORT_OWORD_BLOCK_8_OWORDS 4
-
-#define GEN_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
-#define GEN_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
-
-#define GEN_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
-#define GEN_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
-
-/* This one stays the same across generations. */
-#define GEN_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
-/* GEN4 */
-#define GEN_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
-#define GEN_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
-#define GEN_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
-/* GEN6 */
-#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
-#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
-#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
-#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
-#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
-
-#define GEN_DATAPORT_READ_TARGET_DATA_CACHE 0
-#define GEN_DATAPORT_READ_TARGET_RENDER_CACHE 1
-#define GEN_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
-
-#define GEN_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
-#define GEN_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
-#define GEN_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
-#define GEN_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
-#define GEN_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
-
-#define GEN_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
-#define GEN_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
-#define GEN_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2
-#define GEN_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
-#define GEN_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
-#define GEN_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
-#define GEN_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
-
-/* GEN6 */
-#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7
-#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
-#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9
-#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10
-#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11
-#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12
-#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13
-#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14
-
-/* GEN7 */
-#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10
-
#define GEN_MATH_FUNCTION_INV 1
#define GEN_MATH_FUNCTION_LOG 2
#define GEN_MATH_FUNCTION_EXP 3
* register allocators have to be careful of this to avoid corrupting the "MRF"s
* with actual GRF allocations.
*/
-#define GEN7_MRF_HACK_START 112.
+#define GEN_MRF_HACK_START 112.
/** Number of message register file registers */
#define GEN_MAX_MRF 16
namespace gbe
{
GenEmitter::GenEmitter(uint32_t simdWidth, uint32_t gen) :
- nr_insn(0), simdWidth(simdWidth), gen(gen) {}
+ insnNum(0), gen(gen)
+ {
+ this->curr.execWidth = simdWidth;
+ this->curr.quaterControl = GEN_COMPRESSION_Q1;
+ this->curr.noMask = 0;
+ }
- void GenEmitter::guess_execution_size(GenInstruction *insn, GenReg reg)
+ void GenEmitter::setExecutionWidth(GenInstruction *insn)
{
- if (simdWidth == 8)
- insn->header.execution_size = GEN_EXECUTE_8;
- else if (simdWidth == 16)
- insn->header.execution_size = GEN_EXECUTE_16;
+ if (this->curr.execWidth == 8)
+ insn->header.execution_size = GEN_WIDTH_8;
+ else if (this->curr.execWidth == 16)
+ insn->header.execution_size = GEN_WIDTH_16;
else
GBE_ASSERT(0);
}
}
}
- void GenEmitter::set_predicate_control(uint32_t pc)
- {
- // p->current->header.predicate_control = pc;
- }
-
- void GenEmitter::set_predicate_inverse(bool predicate_inverse)
- {
- // p->current->header.predicate_inverse = predicate_inverse;
- }
-
- void GenEmitter::set_conditionalmod(uint32_t conditional)
- {
- // p->current->header.destreg__conditionalmod = conditional;
- }
-
- void GenEmitter::set_access_mode(uint32_t access_mode)
- {
- // p->current->header.access_mode = access_mode;
- }
-
- void GenEmitter::set_dest(GenInstruction *insn, GenReg dest)
+ void GenEmitter::setDst(GenInstruction *insn, GenReg dest)
{
if (dest.file != GEN_ARCHITECTURE_REGISTER_FILE)
assert(dest.nr < 128);
}
}
- /* NEW: Set the execution size based on dest.width and
- * insn->compression_control:
- */
- guess_execution_size(insn, dest);
+ this->setExecutionWidth(insn);
}
static const int reg_type_size[8] = { 4, 4, 2, 2, 1, 1, 4 };
}
void
- GenEmitter::set_src0(GenInstruction *insn, GenReg reg)
+ GenEmitter::setSrc0(GenInstruction *insn, GenReg reg)
{
if (reg.type != GEN_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
insn->bits1.da1.src0_reg_file = reg.file;
insn->bits1.da1.src0_reg_type = reg.type;
- insn->bits2.da1.src0_abs = reg.abs;
- insn->bits2.da1.src0_negate = reg.negate;
+ insn->bits2.da1.src0_abs = reg.absolute;
+ insn->bits2.da1.src0_negate = reg.negation;
insn->bits2.da1.src0_address_mode = reg.address_mode;
if (reg.file == GEN_IMMEDIATE_VALUE) {
if (insn->header.access_mode == GEN_ALIGN_1) {
if (reg.width == GEN_WIDTH_1 &&
- insn->header.execution_size == GEN_EXECUTE_1) {
+ insn->header.execution_size == GEN_WIDTH_1) {
insn->bits2.da1.src0_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
insn->bits2.da1.src0_width = GEN_WIDTH_1;
insn->bits2.da1.src0_vert_stride = GEN_VERTICAL_STRIDE_0;
}
- void GenEmitter::set_src1(GenInstruction *insn, GenReg reg)
+ void GenEmitter::setSrc1(GenInstruction *insn, GenReg reg)
{
assert(reg.nr < 128);
insn->bits1.da1.src1_reg_file = reg.file;
insn->bits1.da1.src1_reg_type = reg.type;
- insn->bits3.da1.src1_abs = reg.abs;
- insn->bits3.da1.src1_negate = reg.negate;
+ insn->bits3.da1.src1_abs = reg.absolute;
+ insn->bits3.da1.src1_negate = reg.negation;
/* Only src1 can be immediate in two-argument instructions.
*/
if (insn->header.access_mode == GEN_ALIGN_1) {
if (reg.width == GEN_WIDTH_1 &&
- insn->header.execution_size == GEN_EXECUTE_1) {
+ insn->header.execution_size == GEN_WIDTH_1) {
insn->bits3.da1.src1_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
insn->bits3.da1.src1_width = GEN_WIDTH_1;
insn->bits3.da1.src1_vert_stride = GEN_VERTICAL_STRIDE_0;
bool header_present = false,
bool end_of_thread = false)
{
- p->set_src1(inst, brw_imm_d(0));
+ p->setSrc1(inst, GenReg::immd(0));
inst->bits3.generic_gen5.header_present = header_present;
inst->bits3.generic_gen5.response_length = response_length;
inst->bits3.generic_gen5.msg_length = msg_length;
inst->header.destreg__conditionalmod = sfid;
}
-#if 0
- void
- GenEmitter::set_dp_write_message(GenInstruction *insn,
- uint32_t bti,
- uint32_t msg_control,
- uint32_t msg_type,
- uint32_t msg_length,
- bool header_present,
- uint32_t last_render_target,
- uint32_t response_length,
- uint32_t end_of_thread,
- uint32_t send_commit_msg)
- {
- unsigned sfid;
-
- /* Use the Render Cache for RT writes; otherwise use the Data Cache */
- if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
- sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
- else
- sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
- brw_set_message_descriptor(this, insn, GenMessageTarget(sfid),
- msg_length, response_length,
- header_present, end_of_thread);
-
- insn->bits3.gen7_dp.bti = bti;
- insn->bits3.gen7_dp.msg_control = msg_control;
- insn->bits3.gen7_dp.last_render_target = last_render_target;
- insn->bits3.gen7_dp.msg_type = msg_type;
- }
-#endif
-
void
- GenEmitter::set_dp_untyped_rw(GenInstruction *insn,
- uint32_t bti,
- uint32_t rgba,
- uint32_t simd_mode,
- uint32_t msg_type,
- uint32_t msg_length,
- uint32_t response_length)
- {
- GenMessageTarget sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
- brw_set_message_descriptor(this, insn, sfid, msg_length, response_length);
+ set_dp_untyped_rw(GenEmitter *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t rgba,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ uint32_t response_length)
+ {
+ GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA_CACHE;
+ brw_set_message_descriptor(p, insn, sfid, msg_length, response_length);
insn->bits3.gen7_untyped_rw.msg_type = msg_type;
insn->bits3.gen7_untyped_rw.bti = bti;
insn->bits3.gen7_untyped_rw.rgba = rgba;
- insn->bits3.gen7_untyped_rw.simd_mode = simd_mode;
+ if (p->curr.execWidth == 8)
+ insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD8;
+ else if (p->curr.execWidth == 16)
+ insn->bits3.gen7_untyped_rw.simd_mode = GEN_UNTYPED_SIMD16;
+ else
+ NOT_SUPPORTED;
}
static const uint32_t untypedRWMask[] = {
- 0,
- GEN7_UNTYPED_RED,
- GEN7_UNTYPED_RED|GEN7_UNTYPED_GREEN,
- GEN7_UNTYPED_RED|GEN7_UNTYPED_GREEN|GEN7_UNTYPED_BLUE,
- GEN7_UNTYPED_RED|GEN7_UNTYPED_GREEN|GEN7_UNTYPED_BLUE|GEN7_UNTYPED_ALPHA
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_UNTYPED_RED,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN,
+ GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE,
+ GEN_UNTYPED_ALPHA,
+ 0
};
-#if 0
void
GenEmitter::UNTYPED_READ(GenReg dst, GenReg src, uint32_t bti, uint32_t elemNum)
{
GenInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
- this->set_dp_untyped_rw(insn,
- bti,
- untypedRWMask[elemNum],
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ if (this->curr.execWidth == 8) {
+ msg_length = 1;
+ response_length = elemNum;
+ } else if (this->curr.execWidth == 16) {
+ msg_length = 2;
+ response_length = 2*elemNum;
+ } else
+ NOT_IMPLEMENTED;
+ this->setDst(insn, GenReg::uw16grf(dst.nr, 0));
+ this->setSrc0(insn, GenReg::ud8grf(src.nr, 0));
+ this->setSrc1(insn, GenReg::immud(0));
+ this->setExecutionWidth(insn);
+ set_dp_untyped_rw(this,
+ insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN_UNTYPED_READ,
+ msg_length,
+ response_length);
}
void
- GenEmitter::UNTYPED_WRITE(GenReg src, uint32_t bti, uint32_t elemNum)
+ GenEmitter::UNTYPED_WRITE(GenReg msg, uint32_t bti, uint32_t elemNum)
{
GenInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
-
- }
-#endif
-
- void
- GenEmitter::set_sampler_message(GenInstruction *insn,
- uint32_t bti,
- uint32_t sampler,
- uint32_t msg_type,
- uint32_t response_length,
- uint32_t msg_length,
- uint32_t header_present,
- uint32_t simd_mode,
- uint32_t return_format)
- {
- brw_set_message_descriptor(this, insn, GEN_SFID_SAMPLER, msg_length,
- response_length, header_present, false);
+ uint32_t msg_length = 0;
+ uint32_t response_length = 0;
+ if (this->curr.execWidth == 8)
+ msg_length = 1+elemNum;
+ else if (this->curr.execWidth == 16)
+ msg_length = 2*(1+elemNum);
+ else
+ NOT_IMPLEMENTED;
+ this->setDst(insn, GenReg::retype(GenReg::null(), GEN_TYPE_UW));
+ this->setSrc0(insn, GenReg::ud8grf(msg.nr, 0));
+ this->setSrc1(insn, GenReg::immud(0));
+ this->setExecutionWidth(insn);
+ set_dp_untyped_rw(this,
+ insn,
+ bti,
+ untypedRWMask[elemNum],
+ GEN_UNTYPED_WRITE,
+ msg_length,
+ response_length);
+ }
+
+ void set_sampler_message(GenEmitter *p,
+ GenInstruction *insn,
+ uint32_t bti,
+ uint32_t sampler,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header_present,
+ uint32_t simd_mode,
+ uint32_t return_format)
+ {
+ brw_set_message_descriptor(p, insn, GEN_SFID_SAMPLER, msg_length,
+ response_length, header_present);
insn->bits3.sampler_gen7.bti = bti;
insn->bits3.sampler_gen7.sampler = sampler;
insn->bits3.sampler_gen7.msg_type = msg_type;
GenInstruction *GenEmitter::next(uint32_t opcode)
{
GenInstruction *insn;
- insn = &this->store[this->nr_insn++];
+ insn = &this->store[this->insnNum++];
std::memset(insn, 0, sizeof(GenInstruction));
insn->header.opcode = opcode;
return insn;
GenReg src)
{
GenInstruction *insn = p->next(opcode);
- p->set_dest(insn, dest);
- p->set_src0(insn, src);
+ p->setDst(insn, dest);
+ p->setSrc0(insn, src);
return insn;
}
GenReg src1)
{
GenInstruction *insn = p->next(opcode);
- p->set_dest(insn, dest);
- p->set_src0(insn, src0);
- p->set_src1(insn, src1);
+ p->setDst(insn, dest);
+ p->setSrc0(insn, src0);
+ p->setSrc1(insn, src1);
return insn;
}
insn->bits1.da3src.dest_reg_nr = dest.nr;
insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
- p->guess_execution_size(insn, dest);
+ p->setExecutionWidth(insn);
assert(src0.file == GEN_GENERAL_REGISTER_FILE);
assert(src0.address_mode == GEN_ADDRESS_DIRECT);
insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
insn->bits2.da3src.src0_reg_nr = src0.nr;
- insn->bits1.da3src.src0_abs = src0.abs;
- insn->bits1.da3src.src0_negate = src0.negate;
+ insn->bits1.da3src.src0_abs = src0.absolute;
+ insn->bits1.da3src.src0_negate = src0.negation;
insn->bits2.da3src.src0_rep_ctrl = src0.vstride == GEN_VERTICAL_STRIDE_0;
assert(src1.file == GEN_GENERAL_REGISTER_FILE);
insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
insn->bits2.da3src.src1_rep_ctrl = src1.vstride == GEN_VERTICAL_STRIDE_0;
insn->bits3.da3src.src1_reg_nr = src1.nr;
- insn->bits1.da3src.src1_abs = src1.abs;
- insn->bits1.da3src.src1_negate = src1.negate;
+ insn->bits1.da3src.src1_abs = src1.absolute;
+ insn->bits1.da3src.src1_negate = src1.negation;
assert(src2.file == GEN_GENERAL_REGISTER_FILE);
assert(src2.address_mode == GEN_ADDRESS_DIRECT);
insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
insn->bits3.da3src.src2_rep_ctrl = src2.vstride == GEN_VERTICAL_STRIDE_0;
insn->bits3.da3src.src2_reg_nr = src2.nr;
- insn->bits1.da3src.src2_abs = src2.abs;
- insn->bits1.da3src.src2_negate = src2.negate;
+ insn->bits1.da3src.src2_abs = src2.absolute;
+ insn->bits1.da3src.src2_negate = src2.negation;
return insn;
}
return brw_alu3(this, GEN_OPCODE_##OP, dest, src0, src1, src2); \
}
- /* Rounding operations (other than RNDD) require two instructions - the first
- * stores a rounded value (possibly the wrong way) in the dest register, but
- * also sets a per-channel "increment bit" in the flag register. A predicated
- * add of 1.0 fixes dest to contain the desired result.
- *
- * Sandybridge and later appear to round correctly without an ADD.
- */
-#define ROUND(OP) \
- void GenEmitter::OP(GenReg dest, GenReg src) \
- { \
- GenInstruction *rnd; \
- rnd = this->next(GEN_OPCODE_##OP); \
- this->set_dest(rnd, dest); \
- this->set_src0(rnd, src); \
- }
-
ALU1(MOV)
+ ALU1(RNDZ)
+ ALU1(RNDE)
ALU2(SEL)
ALU1(NOT)
ALU2(AND)
ALU2(MAC)
ALU2(MACH)
ALU1(LZD)
- ALU2(DP4)
- ALU2(DPH)
- ALU2(DP3)
- ALU2(DP2)
ALU2(LINE)
ALU2(PLN)
ALU3(MAD)
- ROUND(RNDZ)
- ROUND(RNDE)
-
GenInstruction *GenEmitter::ADD(GenReg dest, GenReg src0, GenReg src1)
{
/* 6.2.2: add */
void GenEmitter::NOP(void)
{
GenInstruction *insn = this->next(GEN_OPCODE_NOP);
- this->set_dest(insn, retype(brw_vec4_grf(0,0), GEN_TYPE_UD));
- this->set_src0(insn, retype(brw_vec4_grf(0,0), GEN_TYPE_UD));
- this->set_src1(insn, brw_imm_ud(0x0));
+ this->setDst(insn, GenReg::retype(GenReg::vec4grf(0,0), GEN_TYPE_UD));
+ this->setSrc0(insn, GenReg::retype(GenReg::vec4grf(0,0), GEN_TYPE_UD));
+ this->setSrc1(insn, GenReg::immud(0x0));
}
GenInstruction *GenEmitter::JMPI(GenReg dest, GenReg src0, GenReg src1)
GenInstruction *insn = this->next(GEN_OPCODE_CMP);
insn->header.destreg__conditionalmod = conditional;
- this->set_dest(insn, dest);
- this->set_src0(insn, src0);
- this->set_src1(insn, src1);
-
- guess_execution_size(insn, src0);
+ this->setDst(insn, dest);
+ this->setSrc0(insn, src0);
+ this->setSrc1(insn, src1);
+ this->setExecutionWidth(insn);
#if 0
/* Make it so that future instructions will use the computed flag
void GenEmitter::WAIT(void)
{
GenInstruction *insn = this->next(GEN_OPCODE_WAIT);
- GenReg src = brw_notification_1_reg();
+ GenReg src = GenReg::notification1();
- this->set_dest(insn, src);
- this->set_src0(insn, src);
- this->set_src1(insn, brw_null_reg());
+ this->setDst(insn, src);
+ this->setSrc0(insn, src);
+ this->setSrc1(insn, GenReg::null());
insn->header.execution_size = 0; /* must */
insn->header.predicate_control = 0;
insn->header.compression_control = 0;
}
- /* Extended math function, float[8] */
- void GenEmitter::math(GenReg dest,
+ void GenEmitter::MATH(GenReg dest,
uint32_t function,
uint32_t saturate,
uint32_t msg_reg_nr,
assert(dest.file == GEN_GENERAL_REGISTER_FILE);
assert(src.file == GEN_GENERAL_REGISTER_FILE);
assert(dest.hstride == GEN_HORIZONTAL_STRIDE_1);
- if (this->gen == 6)
- assert(src.hstride == GEN_HORIZONTAL_STRIDE_1);
-
- /* Source modifiers are ignored for extended math instructions on Gen6. */
- if (this->gen == 6) {
- assert(!src.negate);
- assert(!src.abs);
- }
if (function == GEN_MATH_FUNCTION_INT_DIV_QUOTIENT ||
function == GEN_MATH_FUNCTION_INT_DIV_REMAINDER ||
insn->header.destreg__conditionalmod = function;
insn->header.saturate = saturate;
- this->set_dest(insn, dest);
- this->set_src0(insn, src);
- this->set_src1(insn, brw_null_reg());
+ this->setDst(insn, dest);
+ this->setSrc0(insn, src);
+ this->setSrc1(insn, GenReg::null());
}
- /* Extended math function, float[8] */
- void GenEmitter::math2(GenReg dest, uint32_t function, GenReg src0, GenReg src1)
+
+ void GenEmitter::MATH2(GenReg dest, uint32_t function, GenReg src0, GenReg src1)
{
GenInstruction *insn = this->next(GEN_OPCODE_MATH);
assert(src1.file == GEN_GENERAL_REGISTER_FILE);
assert(dest.hstride == GEN_HORIZONTAL_STRIDE_1);
- if (this->gen == 6) {
- assert(src0.hstride == GEN_HORIZONTAL_STRIDE_1);
- assert(src1.hstride == GEN_HORIZONTAL_STRIDE_1);
- }
if (function == GEN_MATH_FUNCTION_INT_DIV_QUOTIENT ||
function == GEN_MATH_FUNCTION_INT_DIV_REMAINDER ||
assert(src1.type == GEN_TYPE_F);
}
- /* Source modifiers are ignored for extended math instructions on Gen6. */
- if (this->gen == 6) {
- assert(!src0.negate);
- assert(!src0.abs);
- assert(!src1.negate);
- assert(!src1.abs);
- }
-
- /* Math is the same ISA format as other opcodes, except that CondModifier
- * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
- */
insn->header.destreg__conditionalmod = function;
-
- this->set_dest(insn, dest);
- this->set_src0(insn, src0);
- this->set_src1(insn, src1);
+ this->setDst(insn, dest);
+ this->setSrc0(insn, src0);
+ this->setSrc1(insn, src1);
}
- /* Extended math function, float[16] */
- void GenEmitter::math_16(GenReg dest,
- uint32_t function,
- uint32_t saturate,
- uint32_t msg_reg_nr,
- GenReg src,
- uint32_t precision)
+ void GenEmitter::MATH16(GenReg dest,
+ uint32_t function,
+ uint32_t saturate,
+ uint32_t msg_reg_nr,
+ GenReg src,
+ uint32_t precision)
{
GenInstruction *insn;
assert(!src.negate);
assert(!src.abs);
- this->set_dest(insn, dest);
- this->set_src0(insn, src);
- this->set_src1(insn, brw_null_reg());
+ this->setDst(insn, dest);
+ this->setSrc0(insn, src);
+ this->setSrc1(insn, GenReg::null());
}
/**
* Note: the msg_type plus msg_length values determine exactly what kind
* of sampling operation is performed. See volume 4, page 161 of docs.
*/
- void GenEmitter::SAMPLE(
- GenReg dest,
- uint32_t msg_reg_nr,
- GenReg src0,
- uint32_t bti,
- uint32_t sampler,
- uint32_t writemask,
- uint32_t msg_type,
- uint32_t response_length,
- uint32_t msg_length,
- uint32_t header_present,
- uint32_t simd_mode,
- uint32_t return_format)
+ void GenEmitter::SAMPLE(GenReg dest,
+ uint32_t msg_reg_nr,
+ GenReg src0,
+ uint32_t bti,
+ uint32_t sampler,
+ uint32_t writemask,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header_present,
+ uint32_t simd_mode,
+ uint32_t return_format)
{
if (writemask == 0) return;
insn = this->next(GEN_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
- this->set_dest(insn, dest);
- this->set_src0(insn, src0);
- this->set_sampler_message(insn,
- bti,
- sampler,
- msg_type,
- response_length,
- msg_length,
- header_present,
- simd_mode,
- return_format);
+ this->setDst(insn, dest);
+ this->setSrc0(insn, src0);
+ set_sampler_message(this,
+ insn,
+ bti,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ header_present,
+ simd_mode,
+ return_format);
}
}
{
GenInstruction *insn = NULL;
- insn = this->MOV(brw_vec8_grf(msg_nr,0), brw_vec8_grf(0,0));
+ insn = this->MOV(GenReg::vec8grf(msg_nr,0), GenReg::vec8grf(0,0));
insn->header.mask_control = GEN_MASK_DISABLE;
insn = this->next(GEN_OPCODE_SEND);
- this->set_dest(insn, brw_null_reg());
- this->set_src0(insn, brw_vec8_grf(msg_nr,0));
- this->set_src1(insn, brw_imm_ud(0));
- insn->header.execution_size = GEN_EXECUTE_8;
+ this->setDst(insn, GenReg::null());
+ this->setSrc0(insn, GenReg::vec8grf(msg_nr,0));
+ this->setSrc1(insn, GenReg::immud(0));
+ insn->header.execution_size = GEN_WIDTH_8;
insn->bits3.spawner_gen5.resource = GEN_DO_NOT_DEREFERENCE_URB;
insn->bits3.spawner_gen5.msg_length = 1;
insn->bits3.spawner_gen5.end_of_thread = 1;
namespace gbe
{
- /* These aren't hardware structs, just something useful for us to pass around:
- *
- * Align1 operation has a lot of control over input ranges. Used in
- * WM programs to implement shaders decomposed into "channel serial"
- * or "structure of array" form:
+ /*! Type size in bytes for each Gen type */
+ INLINE int typeSize(uint32_t type) {
+ switch(type) {
+ case GEN_TYPE_UD:
+ case GEN_TYPE_D:
+ case GEN_TYPE_F:
+ return 4;
+ case GEN_TYPE_HF:
+ case GEN_TYPE_UW:
+ case GEN_TYPE_W:
+ return 2;
+ case GEN_TYPE_UB:
+ case GEN_TYPE_B:
+ return 1;
+ default:
+ return 0;
+ }
+ }
+
+ /*! This is almost always called with a numeric constant argument, so make
+ * things easy to evaluate at compile time:
*/
+ INLINE uint32_t cvt(uint32_t val) {
+ switch (val) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 3;
+ case 8: return 4;
+ case 16: return 5;
+ case 32: return 6;
+ }
+ return 0;
+ }
+
+ /*! These are not hardware structs, just something useful to pass around */
struct GenReg
{
- uint32_t type:4;
- uint32_t file:2;
- uint32_t nr:8;
- uint32_t subnr:5; /* :1 in align16 */
- uint32_t negate:1; /* source only */
- uint32_t abs:1; /* source only */
- uint32_t vstride:4; /* source only */
- uint32_t width:3; /* src only, align1 only */
- uint32_t hstride:2; /* align1 only */
- uint32_t address_mode:1; /* relative addressing, hopefully! */
- uint32_t pad0:1;
-
- union {
- struct {
- uint32_t swizzle:8; /* src only, align16 only */
- uint32_t writemask:4; /* dest only, align16 only */
- int indirect_offset:10; /* relative addressing offset */
- uint32_t pad1:10; /* two dwords total */
- } bits;
-
- float f;
- int d;
- uint32_t ud;
- } dw1;
+ /*! Empty constructor */
+ INLINE GenReg(void) {}
+ /*! General constructor */
+ INLINE GenReg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr,
+ uint32_t type,
+ uint32_t vstride,
+ uint32_t width,
+ uint32_t hstride,
+ uint32_t swizzle,
+ uint32_t writemask)
+ {
+ if (file == GEN_GENERAL_REGISTER_FILE)
+ assert(nr < GEN_MAX_GRF);
+ else if (file == GEN_ARCHITECTURE_REGISTER_FILE)
+ assert(nr <= GEN_ARF_IP);
+
+ this->type = type;
+ this->file = file;
+ this->nr = nr;
+ this->subnr = subnr * typeSize(type);
+ this->negation = 0;
+ this->absolute = 0;
+ this->vstride = vstride;
+ this->width = width;
+ this->hstride = hstride;
+ this->address_mode = GEN_ADDRESS_DIRECT;
+ this->pad0 = 0;
+ this->dw1.bits.swizzle = swizzle;
+ this->dw1.bits.writemask = writemask;
+ this->dw1.bits.indirect_offset = 0;
+ this->dw1.bits.pad1 = 0;
+ }
+
+ static INLINE GenReg vec16(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenReg(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1,
+ GEN_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+ }
+
+ static INLINE GenReg vec8(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenReg(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1,
+ GEN_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+ }
+
+ static INLINE GenReg vec4(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenReg(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_4,
+ GEN_WIDTH_4,
+ GEN_HORIZONTAL_STRIDE_1,
+ GEN_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+ }
+
+ static INLINE GenReg vec2(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenReg(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_2,
+ GEN_WIDTH_2,
+ GEN_HORIZONTAL_STRIDE_1,
+ GEN_SWIZZLE_XYXY,
+ WRITEMASK_XY);
+ }
+
+ static INLINE GenReg vec1(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return GenReg(file,
+ nr,
+ subnr,
+ GEN_TYPE_F,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0,
+ GEN_SWIZZLE_XXXX,
+ WRITEMASK_X);
+ }
+
+ static INLINE GenReg retype(GenReg reg, uint32_t type) {
+ reg.type = type;
+ return reg;
+ }
+
+ static INLINE GenReg suboffset(GenReg reg, uint32_t delta) {
+ reg.subnr += delta * typeSize(reg.type);
+ return reg;
+ }
+
+ static INLINE GenReg offset(GenReg reg, uint32_t delta) {
+ reg.nr += delta;
+ return reg;
+ }
+
+ static INLINE GenReg byte_offset(GenReg reg, uint32_t bytes) {
+ uint32_t newoffset = reg.nr * GEN_REG_SIZE + reg.subnr + bytes;
+ reg.nr = newoffset / GEN_REG_SIZE;
+ reg.subnr = newoffset % GEN_REG_SIZE;
+ return reg;
+ }
+
+ static INLINE GenReg uw16(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return suboffset(retype(vec16(file, nr, 0), GEN_TYPE_UW), subnr);
+ }
+
+ static INLINE GenReg uw8(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return suboffset(retype(vec8(file, nr, 0), GEN_TYPE_UW), subnr);
+ }
+
+ static INLINE GenReg uw1(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UW), subnr);
+ }
+
+ static INLINE GenReg ud16(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec16(file, nr, subnr), GEN_TYPE_UD);
+ }
+
+ static INLINE GenReg ud8(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec8(file, nr, subnr), GEN_TYPE_UD);
+ }
+
+ static INLINE GenReg ud1(uint32_t file, uint32_t nr, uint32_t subnr) {
+ return retype(vec1(file, nr, subnr), GEN_TYPE_UD);
+ }
+
+ static INLINE GenReg imm(uint32_t type) {
+ return GenReg(GEN_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
+ }
+
+ static INLINE GenReg immf(float f) {
+ GenReg immediate = imm(GEN_TYPE_F);
+ immediate.dw1.f = f;
+ return immediate;
+ }
+
+ static INLINE GenReg immd(int d) {
+ GenReg immediate = imm(GEN_TYPE_D);
+ immediate.dw1.d = d;
+ return immediate;
+ }
+
+ static INLINE GenReg immud(uint32_t ud) {
+ GenReg immediate = imm(GEN_TYPE_UD);
+ immediate.dw1.ud = ud;
+ return immediate;
+ }
+
+ static INLINE GenReg immuw(uint16_t uw) {
+ GenReg immediate = imm(GEN_TYPE_UW);
+ immediate.dw1.ud = uw | (uw << 16);
+ return immediate;
+ }
+
+ static INLINE GenReg immw(short w) {
+ GenReg immediate = imm(GEN_TYPE_W);
+ immediate.dw1.d = w | (w << 16);
+ return immediate;
+ }
+
+ static INLINE GenReg immv(uint32_t v) {
+ GenReg immediate = imm(GEN_TYPE_V);
+ immediate.vstride = GEN_VERTICAL_STRIDE_0;
+ immediate.width = GEN_WIDTH_8;
+ immediate.hstride = GEN_HORIZONTAL_STRIDE_1;
+ immediate.dw1.ud = v;
+ return immediate;
+ }
+
+ static INLINE GenReg immvf(uint32_t v) {
+ GenReg immediate = imm(GEN_TYPE_VF);
+ immediate.vstride = GEN_VERTICAL_STRIDE_0;
+ immediate.width = GEN_WIDTH_4;
+ immediate.hstride = GEN_HORIZONTAL_STRIDE_1;
+ immediate.dw1.ud = v;
+ return immediate;
+ }
+
+ static INLINE GenReg immvf4(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) {
+ GenReg immediate = imm(GEN_TYPE_VF);
+ immediate.vstride = GEN_VERTICAL_STRIDE_0;
+ immediate.width = GEN_WIDTH_4;
+ immediate.hstride = GEN_HORIZONTAL_STRIDE_1;
+ immediate.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
+ return immediate;
+ }
+
+ static INLINE GenReg address(GenReg reg) {
+ return immuw(reg.nr * GEN_REG_SIZE + reg.subnr);
+ }
+
+ static INLINE GenReg vec1grf(uint32_t nr, uint32_t subnr) {
+ return vec1(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenReg vec2grf(uint32_t nr, uint32_t subnr) {
+ return vec2(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenReg vec4grf(uint32_t nr, uint32_t subnr) {
+ return vec4(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenReg vec8grf(uint32_t nr, uint32_t subnr) {
+ return vec8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenReg vec16grf(uint32_t nr, uint32_t subnr) {
+ return vec16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenReg ud8grf(uint32_t nr, uint32_t subnr) {
+ return ud8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenReg ud16grf(uint32_t nr, uint32_t subnr) {
+ return ud16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenReg uw8grf(uint32_t nr, uint32_t subnr) {
+ return uw8(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenReg uw16grf(uint32_t nr, uint32_t subnr) {
+ return uw16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
+ }
+
+ static INLINE GenReg null(void) {
+ return vec8(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_NULL, 0);
+ }
+
+ static INLINE GenReg address(uint32_t subnr) {
+ return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ADDRESS, subnr);
+ }
+
+ static INLINE GenReg acc(void) {
+ return vec8(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ACCUMULATOR, 0);
+ }
+
+ static INLINE GenReg notification1(void) {
+ return GenReg(GEN_ARCHITECTURE_REGISTER_FILE,
+ GEN_ARF_NOTIFICATION_COUNT,
+ 1,
+ GEN_TYPE_UD,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0,
+ GEN_SWIZZLE_XXXX,
+ WRITEMASK_X);
+ }
+
+ static INLINE GenReg flag(void) {
+ return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_FLAG, 0);
+ }
+
+ static INLINE GenReg mask(uint32_t subnr) {
+ return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
+ }
+
+ static INLINE GenReg stride(GenReg reg, uint32_t vstride, uint32_t width, uint32_t hstride) {
+ reg.vstride = cvt(vstride);
+ reg.width = cvt(width) - 1;
+ reg.hstride = cvt(hstride);
+ return reg;
+ }
+
+ static INLINE GenReg vec16(GenReg reg) { return stride(reg, 16,16,1); }
+ static INLINE GenReg vec8(GenReg reg) { return stride(reg, 8,8,1); }
+ static INLINE GenReg vec4(GenReg reg) { return stride(reg, 4,4,1); }
+ static INLINE GenReg vec2(GenReg reg) { return stride(reg, 2,2,1); }
+ static INLINE GenReg vec1(GenReg reg) { return stride(reg, 0,1,0); }
+
+ static INLINE GenReg getElement(GenReg reg, uint32_t elt) {
+ return vec1(suboffset(reg, elt));
+ }
+
+ static INLINE GenReg getElementUD(GenReg reg, uint32_t elt) {
+ return vec1(suboffset(retype(reg, GEN_TYPE_UD), elt));
+ }
+
+ static INLINE GenReg getElementD(GenReg reg, uint32_t elt) {
+ return vec1(suboffset(retype(reg, GEN_TYPE_D), elt));
+ }
+
+ static INLINE GenReg swizzle(GenReg reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w) {
+ assert(reg.file != GEN_IMMEDIATE_VALUE);
+ reg.dw1.bits.swizzle = GEN_SWIZZLE4(GEN_GET_SWZ(reg.dw1.bits.swizzle, x),
+ GEN_GET_SWZ(reg.dw1.bits.swizzle, y),
+ GEN_GET_SWZ(reg.dw1.bits.swizzle, z),
+ GEN_GET_SWZ(reg.dw1.bits.swizzle, w));
+ return reg;
+ }
+
+ static INLINE GenReg swizzle1(GenReg reg, uint32_t x) {
+ return swizzle(reg, x, x, x, x);
+ }
+
+ static INLINE GenReg writemask(GenReg reg, uint32_t mask) {
+ assert(reg.file != GEN_IMMEDIATE_VALUE);
+ reg.dw1.bits.writemask &= mask;
+ return reg;
+ }
+
+ static INLINE GenReg set_writemask(GenReg reg, uint32_t mask) {
+ assert(reg.file != GEN_IMMEDIATE_VALUE);
+ reg.dw1.bits.writemask = mask;
+ return reg;
+ }
+
+ static INLINE GenReg negate(GenReg reg) {
+ reg.negation ^= 1;
+ return reg;
+ }
+
+ static INLINE GenReg abs(GenReg reg) {
+ reg.absolute = 1;
+ reg.negation = 0;
+ return reg;
+ }
+
+ static INLINE GenReg vec4_indirect(uint32_t subnr, int offset) {
+ GenReg reg = vec4grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = GEN_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+ }
+
+ static INLINE GenReg vec1_indirect(uint32_t subnr, int offset) {
+ GenReg reg = vec1grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = GEN_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+ }
+
+ static INLINE bool same_reg(GenReg r1, GenReg r2) {
+ return r1.file == r2.file && r1.nr == r2.nr;
+ }
+
+ uint32_t type:4;
+ uint32_t file:2;
+ uint32_t nr:8;
+ uint32_t subnr:5; /* :1 in align16 */
+ uint32_t negation:1; /* source only */
+ uint32_t absolute:1; /* source only */
+ uint32_t vstride:4; /* source only */
+ uint32_t width:3; /* src only, align1 only */
+ uint32_t hstride:2; /* align1 only */
+ uint32_t address_mode:1; /* relative addressing, hopefully! */
+ uint32_t pad0:1;
+
+ union {
+ struct {
+ uint32_t swizzle:8; /* src only, align16 only */
+ uint32_t writemask:4; /* dest only, align16 only */
+ int32_t indirect_offset:10; /* relative addressing offset */
+ uint32_t pad1:10; /* two dwords total */
+ } bits;
+ float f;
+ int32_t d;
+ uint32_t ud;
+ } dw1;
+ };
+
+ /*! The state for each instruction. */
+ struct GenInstructionState
+ {
+ uint32_t execWidth:6;
+ uint32_t quaterControl:2;
+ uint32_t noMask:1;
};
/*! Helper structure to emit Gen instructions */
struct GenEmitter
{
+ /*! simdWidth is the default width for the instructions */
GenEmitter(uint32_t simdWidth, uint32_t gen);
- GenInstruction store[8192];
- uint32_t nr_insn;
- uint32_t simdWidth;
+ /*! TODO use a vector */
+ enum { MAX_INSN_NUM = 8192 };
+ /*! Size of the stack (should be large enough) */
+ enum { MAX_STATE_NUM = 16 };
+ /*! TODO Update that with a vector */
+ GenInstruction store[MAX_INSN_NUM];
+ /*! Number of instructions currently pushed */
+ uint32_t insnNum;
+ /*! Current instruction state to use */
+ GenInstructionState curr;
+ /*! State used to encode the instructions */
+ GenInstructionState stack[MAX_STATE_NUM];
+ /*! Gen generation to encode */
uint32_t gen;
- INLINE GenInstruction *current_insn(void) { return &this->store[this->nr_insn]; }
-
- void guess_execution_size(GenInstruction *insn, GenReg reg);
- void set_mask_control(uint32_t value);
- void set_saturate(uint32_t value);
- void set_access_mode(uint32_t access_mode);
- void set_predicate_control_flag_value(uint32_t value);
- void set_predicate_control(uint32_t pc);
- void set_predicate_inverse(bool predicate_inverse);
- void set_conditionalmod(uint32_t conditional);
- void set_acc_write_control(uint32_t value);
-
- void init_compile(struct context *, void *mem_ctx);
- const uint32_t *get_program(uint32_t *sz);
-
- GenInstruction *next(uint32_t opcode);
- void set_dest(GenInstruction *insn, GenReg dest);
- void set_src0(GenInstruction *insn, GenReg reg);
+ ////////////////////////////////////////////////////////////////////////
+ // Encoding functions
+ ////////////////////////////////////////////////////////////////////////
#define ALU1(OP) GenInstruction *OP(GenReg dest, GenReg src0);
#define ALU2(OP) GenInstruction *OP(GenReg dest, GenReg src0, GenReg src1);
#define ALU3(OP) GenInstruction *OP(GenReg dest, GenReg src0, GenReg src1, GenReg src2);
-#define ROUND(OP) void OP(GenReg dest, GenReg src0);
-
ALU1(MOV)
+ ALU1(RNDZ)
+ ALU1(RNDE)
ALU2(SEL)
ALU1(NOT)
ALU2(AND)
ALU2(MAC)
ALU2(MACH)
ALU1(LZD)
- ALU2(DP4)
- ALU2(DPH)
- ALU2(DP3)
- ALU2(DP2)
ALU2(LINE)
ALU2(PLN)
ALU3(MAD)
-
- ROUND(RNDZ)
- ROUND(RNDE)
-
#undef ALU1
#undef ALU2
#undef ALU3
-#undef ROUND
+ /*! Compare instructions */
+ void CMP(GenReg dst, uint32_t conditional, GenReg src0, GenReg src1);
+
+ /*! EOT is used to finish GPGPU threads */
void EOT(uint32_t msg_nr);
+
+ /*! No-op */
void NOP(void);
+
+ /*! Wait instruction (used for the barrier) */
void WAIT(void);
- void UNTYPED_READ(GenReg src, GenReg dst, uint32_t bti, uint32_t elemNum);
- void UNTYPED_WRITE(GenReg src, uint32_t bti, uint32_t elemNum);
- /* Helpers for SEND instruction */
- void set_sampler_message(GenInstruction *insn,
- uint32_t bti,
- uint32_t sampler,
- uint32_t msg_type,
- uint32_t response_length,
- uint32_t msg_length,
- uint32_t header_present,
- uint32_t simd_mode,
- uint32_t return_format);
-
- void set_dp_untyped_rw(GenInstruction *insn,
- uint32_t bti,
- uint32_t rgba,
- uint32_t simd_mode,
- uint32_t msg_type,
- uint32_t msg_length,
- uint32_t response_length);
+ /*! Untyped read (upto 4 channels) */
+ void UNTYPED_READ(GenReg dst, GenReg src, uint32_t bti, uint32_t elemNum);
+ /*! Untyped write (upto 4 channels) */
+ void UNTYPED_WRITE(GenReg src, uint32_t bti, uint32_t elemNum);
+
+ /*! Send instruction for the sampler */
void SAMPLE(GenReg dest,
uint32_t msg_reg_nr,
GenReg src0,
uint32_t simd_mode,
uint32_t return_format);
- void math_16(GenReg dest,
- uint32_t function,
- uint32_t saturate,
- uint32_t msg_reg_nr,
- GenReg src,
- uint32_t precision);
+ /*! Extended math function, float[16] */
+ void MATH16(GenReg dest,
+ uint32_t function,
+ uint32_t saturate,
+ uint32_t msg_reg_nr,
+ GenReg src,
+ uint32_t precision);
- void math(GenReg dest,
+ /*! Extended math function, float[8] */
+ void MATH(GenReg dest,
uint32_t function,
uint32_t saturate,
uint32_t msg_reg_nr,
uint32_t data_type,
uint32_t precision);
- void math2(GenReg dest, uint32_t function, GenReg src0, GenReg src1);
+ /*! Extended math function, float[8] */
+ void MATH2(GenReg dest, uint32_t function, GenReg src0, GenReg src1);
- void CMP(GenReg dest, uint32_t conditional, GenReg src0, GenReg src1);
- void copy4(GenReg dst, GenReg src, uint32_t count);
- void copy8(GenReg dst, GenReg src, uint32_t count);
- void math_invert(GenReg dst, GenReg src);
- void set_src1(GenInstruction *insn, GenReg reg);
- void set_uip_jip(void);
+ ////////////////////////////////////////////////////////////////////////
+ // Helper functions to encode
+ ////////////////////////////////////////////////////////////////////////
+ void setExecutionWidth(GenInstruction *insn);
+ void setDst(GenInstruction *insn, GenReg dest);
+ void setSrc0(GenInstruction *insn, GenReg reg);
+ void setSrc1(GenInstruction *insn, GenReg reg);
+ GenInstruction *next(uint32_t opcode);
};
- static INLINE int type_sz(uint32_t type)
- {
- switch(type) {
- case GEN_TYPE_UD:
- case GEN_TYPE_D:
- case GEN_TYPE_F:
- return 4;
- case GEN_TYPE_HF:
- case GEN_TYPE_UW:
- case GEN_TYPE_W:
- return 2;
- case GEN_TYPE_UB:
- case GEN_TYPE_B:
- return 1;
- default:
- return 0;
- }
- }
-
- static INLINE bool brw_is_single_value_swizzle(int swiz)
- {
+ INLINE bool brw_is_single_value_swizzle(int swiz) {
return (swiz == GEN_SWIZZLE_XXXX ||
swiz == GEN_SWIZZLE_YYYY ||
swiz == GEN_SWIZZLE_ZZZZ ||
swiz == GEN_SWIZZLE_WWWW);
}
- /**
- * Construct a GenReg.
- * \param file one of the GEN_x_REGISTER_FILE values
- * \param nr register number/index
- * \param subnr register sub number
- * \param type one of GEN_TYPE_x
- * \param vstride one of GEN_VERTICAL_STRIDE_x
- * \param width one of GEN_WIDTH_x
- * \param hstride one of GEN_HORIZONTAL_STRIDE_x
- * \param swizzle one of GEN_SWIZZLE_x
- * \param writemask WRITEMASK_X/Y/Z/W bitfield
- */
- static INLINE GenReg makeGenReg(uint32_t file,
- uint32_t nr,
- uint32_t subnr,
- uint32_t type,
- uint32_t vstride,
- uint32_t width,
- uint32_t hstride,
- uint32_t swizzle,
- uint32_t writemask)
- {
- GenReg reg;
- if (file == GEN_GENERAL_REGISTER_FILE)
- assert(nr < GEN_MAX_GRF);
- else if (file == GEN_ARCHITECTURE_REGISTER_FILE)
- assert(nr <= GEN_ARF_IP);
-
- reg.type = type;
- reg.file = file;
- reg.nr = nr;
- reg.subnr = subnr * type_sz(type);
- reg.negate = 0;
- reg.abs = 0;
- reg.vstride = vstride;
- reg.width = width;
- reg.hstride = hstride;
- reg.address_mode = GEN_ADDRESS_DIRECT;
- reg.pad0 = 0;
-
- /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
- * set swizzle and writemask to W, as the lower bits of subnr will
- * be lost when converted to align16. This is probably too much to
- * keep track of as you'd want it adjusted by suboffset(), etc.
- * Perhaps fix up when converting to align16?
- */
- reg.dw1.bits.swizzle = swizzle;
- reg.dw1.bits.writemask = writemask;
- reg.dw1.bits.indirect_offset = 0;
- reg.dw1.bits.pad1 = 0;
- return reg;
- }
-
- /** Construct float[16] register */
- static INLINE GenReg brw_vec16_reg(uint32_t file, uint32_t nr, uint32_t subnr)
- {
- return makeGenReg(file,
- nr,
- subnr,
- GEN_TYPE_F,
- GEN_VERTICAL_STRIDE_8,
- GEN_WIDTH_8,
- GEN_HORIZONTAL_STRIDE_1,
- GEN_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
- }
-
- /** Construct float[8] register */
- static INLINE GenReg brw_vec8_reg(uint32_t file, uint32_t nr, uint32_t subnr)
- {
- return makeGenReg(file,
- nr,
- subnr,
- GEN_TYPE_F,
- GEN_VERTICAL_STRIDE_8,
- GEN_WIDTH_8,
- GEN_HORIZONTAL_STRIDE_1,
- GEN_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
- }
-
- /** Construct float[4] register */
- static INLINE GenReg brw_vec4_reg(uint32_t file, uint32_t nr, uint32_t subnr)
- {
- return makeGenReg(file,
- nr,
- subnr,
- GEN_TYPE_F,
- GEN_VERTICAL_STRIDE_4,
- GEN_WIDTH_4,
- GEN_HORIZONTAL_STRIDE_1,
- GEN_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
- }
-
- /** Construct float[2] register */
- static INLINE GenReg brw_vec2_reg(uint32_t file, uint32_t nr, uint32_t subnr)
- {
- return makeGenReg(file,
- nr,
- subnr,
- GEN_TYPE_F,
- GEN_VERTICAL_STRIDE_2,
- GEN_WIDTH_2,
- GEN_HORIZONTAL_STRIDE_1,
- GEN_SWIZZLE_XYXY,
- WRITEMASK_XY);
- }
-
- /** Construct float[1] register */
- static INLINE GenReg brw_vec1_reg(uint32_t file, uint32_t nr, uint32_t subnr)
- {
- return makeGenReg(file,
- nr,
- subnr,
- GEN_TYPE_F,
- GEN_VERTICAL_STRIDE_0,
- GEN_WIDTH_1,
- GEN_HORIZONTAL_STRIDE_0,
- GEN_SWIZZLE_XXXX,
- WRITEMASK_X);
- }
-
- static INLINE GenReg retype(GenReg reg, uint32_t type)
- {
- reg.type = type;
- return reg;
- }
-
- static INLINE GenReg sechalf(GenReg reg)
- {
- if (reg.vstride)
- reg.nr++;
- return reg;
- }
-
- static INLINE GenReg suboffset(GenReg reg, uint32_t delta)
- {
- reg.subnr += delta * type_sz(reg.type);
- return reg;
- }
-
- static INLINE GenReg offset(GenReg reg, uint32_t delta)
- {
- reg.nr += delta;
- return reg;
- }
-
- static INLINE GenReg byte_offset(GenReg reg, uint32_t bytes)
- {
- uint32_t newoffset = reg.nr * GEN_REG_SIZE + reg.subnr + bytes;
- reg.nr = newoffset / GEN_REG_SIZE;
- reg.subnr = newoffset % GEN_REG_SIZE;
- return reg;
- }
-
-
- /** Construct unsigned word[16] register */
- static INLINE GenReg brw_uw16_reg(uint32_t file, uint32_t nr, uint32_t subnr)
- {
- return suboffset(retype(brw_vec16_reg(file, nr, 0), GEN_TYPE_UW), subnr);
- }
-
- /** Construct unsigned word[8] register */
- static INLINE GenReg brw_uw8_reg(uint32_t file, uint32_t nr, uint32_t subnr)
- {
- return suboffset(retype(brw_vec8_reg(file, nr, 0), GEN_TYPE_UW), subnr);
- }
-
- /** Construct unsigned word[1] register */
- static INLINE GenReg brw_uw1_reg(uint32_t file, uint32_t nr, uint32_t subnr)
- {
- return suboffset(retype(brw_vec1_reg(file, nr, 0), GEN_TYPE_UW), subnr);
- }
-
- static INLINE GenReg brw_imm_reg(uint32_t type)
- {
- return makeGenReg(GEN_IMMEDIATE_VALUE,
- 0,
- 0,
- type,
- GEN_VERTICAL_STRIDE_0,
- GEN_WIDTH_1,
- GEN_HORIZONTAL_STRIDE_0,
- 0,
- 0);
- }
-
- /** Construct float immediate register */
- static INLINE GenReg brw_imm_f(float f)
- {
- GenReg imm = brw_imm_reg(GEN_TYPE_F);
- imm.dw1.f = f;
- return imm;
- }
-
- /** Construct integer immediate register */
- static INLINE GenReg brw_imm_d(int d)
- {
- GenReg imm = brw_imm_reg(GEN_TYPE_D);
- imm.dw1.d = d;
- return imm;
- }
-
- /** Construct uint immediate register */
- static INLINE GenReg brw_imm_ud(uint32_t ud)
- {
- GenReg imm = brw_imm_reg(GEN_TYPE_UD);
- imm.dw1.ud = ud;
- return imm;
- }
-
- /** Construct ushort immediate register */
- static INLINE GenReg brw_imm_uw(uint16_t uw)
- {
- GenReg imm = brw_imm_reg(GEN_TYPE_UW);
- imm.dw1.ud = uw | (uw << 16);
- return imm;
- }
-
- /** Construct short immediate register */
- static INLINE GenReg brw_imm_w(short w)
- {
- GenReg imm = brw_imm_reg(GEN_TYPE_W);
- imm.dw1.d = w | (w << 16);
- return imm;
- }
-
- /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
- * numbers alias with _V and _VF below:
- */
-
- /** Construct vector of eight signed half-byte values */
- static INLINE GenReg brw_imm_v(uint32_t v)
- {
- GenReg imm = brw_imm_reg(GEN_TYPE_V);
- imm.vstride = GEN_VERTICAL_STRIDE_0;
- imm.width = GEN_WIDTH_8;
- imm.hstride = GEN_HORIZONTAL_STRIDE_1;
- imm.dw1.ud = v;
- return imm;
- }
-
- /** Construct vector of four 8-bit float values */
- static INLINE GenReg brw_imm_vf(uint32_t v)
- {
- GenReg imm = brw_imm_reg(GEN_TYPE_VF);
- imm.vstride = GEN_VERTICAL_STRIDE_0;
- imm.width = GEN_WIDTH_4;
- imm.hstride = GEN_HORIZONTAL_STRIDE_1;
- imm.dw1.ud = v;
- return imm;
- }
-
- static INLINE GenReg brw_imm_vf4(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3)
- {
- GenReg imm = brw_imm_reg(GEN_TYPE_VF);
- imm.vstride = GEN_VERTICAL_STRIDE_0;
- imm.width = GEN_WIDTH_4;
- imm.hstride = GEN_HORIZONTAL_STRIDE_1;
- imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
- return imm;
- }
-
- static INLINE GenReg brw_address(GenReg reg)
- {
- return brw_imm_uw(reg.nr * GEN_REG_SIZE + reg.subnr);
- }
-
- /** Construct float[1] general-purpose register */
- static INLINE GenReg brw_vec1_grf(uint32_t nr, uint32_t subnr)
- {
- return brw_vec1_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr);
- }
-
- /** Construct float[2] general-purpose register */
- static INLINE GenReg brw_vec2_grf(uint32_t nr, uint32_t subnr)
- {
- return brw_vec2_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr);
- }
-
- /** Construct float[4] general-purpose register */
- static INLINE GenReg brw_vec4_grf(uint32_t nr, uint32_t subnr)
- {
- return brw_vec4_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr);
- }
-
- /** Construct float[8] general-purpose register */
- static INLINE GenReg brw_vec8_grf(uint32_t nr, uint32_t subnr)
- {
- return brw_vec8_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr);
- }
-
- static INLINE GenReg brw_uw8_grf(uint32_t nr, uint32_t subnr)
- {
- return brw_uw8_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr);
- }
-
- static INLINE GenReg brw_uw16_grf(uint32_t nr, uint32_t subnr)
- {
- return brw_uw16_reg(GEN_GENERAL_REGISTER_FILE, nr, subnr);
- }
-
- /** Construct null register (usually used for setting condition codes) */
- static INLINE GenReg brw_null_reg(void)
- {
- return brw_vec8_reg(GEN_ARCHITECTURE_REGISTER_FILE,
- GEN_ARF_NULL,
- 0);
- }
-
- static INLINE GenReg brw_address_reg(uint32_t subnr)
- {
- return brw_uw1_reg(GEN_ARCHITECTURE_REGISTER_FILE,
- GEN_ARF_ADDRESS,
- subnr);
- }
-
- /* If/else instructions break in align16 mode if writemask & swizzle
- * aren't xyzw. This goes against the convention for other scalar
- * regs:
- */
- static INLINE GenReg brw_ip_reg(void)
- {
- return makeGenReg(GEN_ARCHITECTURE_REGISTER_FILE,
- GEN_ARF_IP,
- 0,
- GEN_TYPE_UD,
- GEN_VERTICAL_STRIDE_4, /* ? */
- GEN_WIDTH_1,
- GEN_HORIZONTAL_STRIDE_0,
- GEN_SWIZZLE_XYZW, /* NOTE! */
- WRITEMASK_XYZW); /* NOTE! */
- }
-
- static INLINE GenReg brw_acc_reg(void)
- {
- return brw_vec8_reg(GEN_ARCHITECTURE_REGISTER_FILE,
- GEN_ARF_ACCUMULATOR,
- 0);
- }
-
- static INLINE GenReg brw_notification_1_reg(void)
- {
-
- return makeGenReg(GEN_ARCHITECTURE_REGISTER_FILE,
- GEN_ARF_NOTIFICATION_COUNT,
- 1,
- GEN_TYPE_UD,
- GEN_VERTICAL_STRIDE_0,
- GEN_WIDTH_1,
- GEN_HORIZONTAL_STRIDE_0,
- GEN_SWIZZLE_XXXX,
- WRITEMASK_X);
- }
-
-
- static INLINE GenReg brw_flag_reg(void)
- {
- return brw_uw1_reg(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_FLAG, 0);
- }
-
- static INLINE GenReg brw_mask_reg(uint32_t subnr)
- {
- return brw_uw1_reg(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
- }
-
- /* This is almost always called with a numeric constant argument, so
- * make things easy to evaluate at compile time:
- */
- static INLINE uint32_t cvt(uint32_t val)
- {
- switch (val) {
- case 0: return 0;
- case 1: return 1;
- case 2: return 2;
- case 4: return 3;
- case 8: return 4;
- case 16: return 5;
- case 32: return 6;
- }
- return 0;
- }
-
- static INLINE GenReg stride(GenReg reg,
- uint32_t vstride,
- uint32_t width,
- uint32_t hstride)
- {
- reg.vstride = cvt(vstride);
- reg.width = cvt(width) - 1;
- reg.hstride = cvt(hstride);
- return reg;
- }
-
-
- static INLINE GenReg vec16(GenReg reg)
- {
- return stride(reg, 16,16,1);
- }
-
- static INLINE GenReg vec8(GenReg reg)
- {
- return stride(reg, 8,8,1);
- }
-
- static INLINE GenReg vec4(GenReg reg)
- {
- return stride(reg, 4,4,1);
- }
-
- static INLINE GenReg vec2(GenReg reg)
- {
- return stride(reg, 2,2,1);
- }
-
- static INLINE GenReg vec1(GenReg reg)
- {
- return stride(reg, 0,1,0);
- }
-
- static INLINE GenReg get_element(GenReg reg, uint32_t elt)
- {
- return vec1(suboffset(reg, elt));
- }
-
- static INLINE GenReg get_element_ud(GenReg reg, uint32_t elt)
- {
- return vec1(suboffset(retype(reg, GEN_TYPE_UD), elt));
- }
-
- static INLINE GenReg get_element_d(GenReg reg, uint32_t elt)
- {
- return vec1(suboffset(retype(reg, GEN_TYPE_D), elt));
- }
-
- static INLINE GenReg brw_swizzle(GenReg reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
- {
- assert(reg.file != GEN_IMMEDIATE_VALUE);
- reg.dw1.bits.swizzle = GEN_SWIZZLE4(GEN_GET_SWZ(reg.dw1.bits.swizzle, x),
- GEN_GET_SWZ(reg.dw1.bits.swizzle, y),
- GEN_GET_SWZ(reg.dw1.bits.swizzle, z),
- GEN_GET_SWZ(reg.dw1.bits.swizzle, w));
- return reg;
- }
-
-
- static INLINE GenReg brw_swizzle1(GenReg reg, uint32_t x)
- {
- return brw_swizzle(reg, x, x, x, x);
- }
-
- static INLINE GenReg brw_writemask(GenReg reg, uint32_t mask)
- {
- assert(reg.file != GEN_IMMEDIATE_VALUE);
- reg.dw1.bits.writemask &= mask;
- return reg;
- }
-
- static INLINE GenReg brw_set_writemask(GenReg reg, uint32_t mask)
- {
- assert(reg.file != GEN_IMMEDIATE_VALUE);
- reg.dw1.bits.writemask = mask;
- return reg;
- }
-
- static INLINE GenReg negate(GenReg reg)
- {
- reg.negate ^= 1;
- return reg;
- }
-
- static INLINE GenReg brw_abs(GenReg reg)
- {
- reg.abs = 1;
- reg.negate = 0;
- return reg;
- }
-
- static INLINE GenReg brw_vec4_indirect(uint32_t subnr, int offset)
- {
- GenReg reg = brw_vec4_grf(0, 0);
- reg.subnr = subnr;
- reg.address_mode = GEN_ADDRESS_REGISTER_INDIRECT_REGISTER;
- reg.dw1.bits.indirect_offset = offset;
- return reg;
- }
-
- static INLINE GenReg brw_vec1_indirect(uint32_t subnr, int offset)
- {
- GenReg reg = brw_vec1_grf(0, 0);
- reg.subnr = subnr;
- reg.address_mode = GEN_ADDRESS_REGISTER_INDIRECT_REGISTER;
- reg.dw1.bits.indirect_offset = offset;
- return reg;
- }
-
- static INLINE bool brw_same_reg(GenReg r1, GenReg r2)
- {
- return r1.file == r2.file && r1.nr == r2.nr;
- }
-
uint32_t brw_swap_cmod(uint32_t cmod);
} /* namespace gbe */
INLINE AddressSpace getAddressSpace(void) const { return addrSpace; }
INLINE bool wellFormed(const Function &fn, std::string &why) const;
INLINE void out(std::ostream &out, const Function &fn) const;
+ INLINE bool isAligned(void) const { return !!dwAligned; }
Type type; //!< Type to store
Register offset; //!< First source is the offset where to store
Tuple values; //!< Values to load
INLINE AddressSpace getAddressSpace(void) const { return addrSpace; }
INLINE bool wellFormed(const Function &fn, std::string &why) const;
INLINE void out(std::ostream &out, const Function &fn) const;
- Type type; //!< Type to store
- Register offset; //!< First source is the offset where to store
- Tuple values; //!< Values to store
+ INLINE bool isAligned(void) const { return !!dwAligned; }
+ Type type; //!< Type to store
+ Register offset; //!< First source is the offset where to store
+ Tuple values; //!< Values to store
AddressSpace addrSpace; //!< Where to store
- uint8_t valueNum:7; //!< Number of values to store
- uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN
+ uint8_t valueNum:7; //!< Number of values to store
+ uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN
};
class ALIGNED_INSTRUCTION TextureInstruction :
out << " %" << this->getDst(fn,0) << " ";
fn.outImmediate(out, immediateIndex);
}
-
} /* namespace internal */
std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace) {
// macro horrors
///////////////////////////////////////////////////////////////////////////
-#define DECL_INSN(OPCODE, CLASS) \
+#define DECL_INSN(OPCODE, CLASS) \
case OP_##OPCODE: return reinterpret_cast<const internal::CLASS*>(this)->CALL;
-#define START_FUNCTION(CLASS, RET, PROTOTYPE) \
- RET CLASS::PROTOTYPE const { \
- const Opcode op = this->getOpcode(); \
+#define START_FUNCTION(CLASS, RET, PROTOTYPE) \
+ RET CLASS::PROTOTYPE const { \
+ const Opcode op = this->getOpcode(); \
switch (op) {
-#define END_FUNCTION(CLASS, RET) \
- }; \
- return RET(); \
+#define END_FUNCTION(CLASS, RET) \
+ }; \
+ return RET(); \
}
#define CALL getSrcNum()
#undef DECL_INSN
-#define DECL_INSN(OPCODE, CLASS) \
- case OP_##OPCODE: \
- { \
- const Function &fn = this->getFunction(); \
- return reinterpret_cast<const internal::CLASS*>(this)->CALL; \
+#define DECL_INSN(OPCODE, CLASS) \
+ case OP_##OPCODE: \
+ { \
+ const Function &fn = this->getFunction(); \
+ return reinterpret_cast<const internal::CLASS*>(this)->CALL; \
}
#define CALL wellFormed(fn, whyNot)
fn.deleteInstruction(other);
}
-#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
- RET CLASS::PROTOTYPE const { \
- return reinterpret_cast<const internal::CLASS*>(this)->CALL; \
+#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
+ RET CLASS::PROTOTYPE const { \
+ return reinterpret_cast<const internal::CLASS*>(this)->CALL; \
}
DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType())
DECL_MEM_FN(StoreInstruction, Type, getValueType(void), getValueType())
DECL_MEM_FN(StoreInstruction, uint32_t, getValueNum(void), getValueNum())
DECL_MEM_FN(StoreInstruction, AddressSpace, getAddressSpace(void), getAddressSpace())
+DECL_MEM_FN(StoreInstruction, bool, isAligned(void), isAligned())
DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType())
DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum())
DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void), getAddressSpace())
+DECL_MEM_FN(LoadInstruction, bool, isAligned(void), isAligned())
DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())
DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
///////////////////////////////////////////////////////////////////////////
// All unary functions
-#define DECL_EMIT_FUNCTION(NAME) \
- Instruction NAME(Type type, Register dst, Register src) { \
+#define DECL_EMIT_FUNCTION(NAME) \
+ Instruction NAME(Type type, Register dst, Register src) { \
const internal::UnaryInstruction insn(OP_##NAME, type, dst, src); \
- return insn.convert(); \
+ return insn.convert(); \
}
DECL_EMIT_FUNCTION(MOV)
#undef DECL_EMIT_FUNCTION
// All binary functions
-#define DECL_EMIT_FUNCTION(NAME) \
- Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
+#define DECL_EMIT_FUNCTION(NAME) \
+ Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
const internal::BinaryInstruction insn(OP_##NAME, type, dst, src0, src1); \
- return insn.convert(); \
+ return insn.convert(); \
}
DECL_EMIT_FUNCTION(MUL)
}
// All compare functions
-#define DECL_EMIT_FUNCTION(NAME) \
- Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
- const internal::CompareInstruction insn(OP_##NAME, type, dst, src0, src1);\
- return insn.convert(); \
+#define DECL_EMIT_FUNCTION(NAME) \
+ Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
+ const internal::CompareInstruction insn(OP_##NAME, type, dst, src0, src1); \
+ return insn.convert(); \
}
DECL_EMIT_FUNCTION(EQ)
}
// LOAD and STORE
-#define DECL_EMIT_FUNCTION(NAME, CLASS) \
- Instruction NAME(Type type, \
- Tuple tuple, \
- Register offset, \
- AddressSpace space, \
- uint32_t valueNum, \
- bool dwAligned) \
- { \
+#define DECL_EMIT_FUNCTION(NAME, CLASS) \
+ Instruction NAME(Type type, \
+ Tuple tuple, \
+ Register offset, \
+ AddressSpace space, \
+ uint32_t valueNum, \
+ bool dwAligned) \
+ { \
const internal::CLASS insn(type,tuple,offset,space,valueNum,dwAligned); \
- return insn.convert(); \
+ return insn.convert(); \
}
DECL_EMIT_FUNCTION(LOAD, LoadInstruction)
std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
const Function &fn = insn.getFunction();
switch (insn.getOpcode()) {
-#define DECL_INSN(OPCODE, CLASS) \
- case OP_##OPCODE: \
+#define DECL_INSN(OPCODE, CLASS) \
+ case OP_##OPCODE: \
reinterpret_cast<const internal::CLASS&>(insn).out(out, fn); \
break;
#include "instruction.hxx"
/*! Address space that is manipulated here */
AddressSpace getAddressSpace(void) const;
/*! DWORD aligned means untyped read for Gen. That is what matters */
- bool isDWORDAligned(void) const;
+ bool isAligned(void) const;
+ /*! Return the register that contains the addresses */
+ INLINE Register getAddress(void) const { return this->getSrc(0u); }
+ /*! Return the register that contain value valueID */
+ INLINE Register getValue(uint32_t valueID) const {
+ GBE_ASSERT(valueID < this->getValueNum());
+ return this->getSrc(valueID + 1u);
+ }
/*! Return true if the given instruction is an instance of this class */
static bool isClassOf(const Instruction &insn);
};
/*! Address space that is manipulated here */
AddressSpace getAddressSpace(void) const;
/*! DWORD aligned means untyped read for Gen. That is what matters */
- bool isDWORDAligned(void) const;
+ bool isAligned(void) const;
+ /*! Return the register that contains the addresses */
+ INLINE Register getAddress(void) const { return this->getSrc(0u); }
+ /*! Return the register that contain value valueID */
+ INLINE Register getValue(uint32_t valueID) const {
+ return this->getDst(valueID);
+ }
/*! Return true if the given instruction is an instance of this class */
static bool isClassOf(const Instruction &insn);
};