backend/gen_program.hpp
backend/gen_program.h
backend/gen_defs.hpp
+ backend/gen_insn_compact.cpp
backend/gen_encoder.hpp
backend/gen_encoder.cpp)
return err;
}
-static int dest (FILE *file, const struct GenInstruction *inst)
+static int dest (FILE *file, const union GenNativeInstruction *inst)
{
int err = 0;
return 0;
}
-static int dest_3src (FILE *file, const struct GenInstruction *inst)
+static int dest_3src (FILE *file, const union GenNativeInstruction *inst)
{
int err = 0;
const uint32_t reg_file = GEN_GENERAL_REGISTER_FILE;
return err;
}
-static int src0_3src (FILE *file, const struct GenInstruction *inst)
+static int src0_3src (FILE *file, const union GenNativeInstruction *inst)
{
int err = 0;
uint32_t swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
return err;
}
-static int src1_3src (FILE *file, const struct GenInstruction *inst)
+static int src1_3src (FILE *file, const union GenNativeInstruction *inst)
{
int err = 0;
uint32_t swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
}
-static int src2_3src (FILE *file, const struct GenInstruction *inst)
+static int src2_3src (FILE *file, const union GenNativeInstruction *inst)
{
int err = 0;
uint32_t swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
return err;
}
-static int imm (FILE *file, uint32_t type, const struct GenInstruction *inst) {
+static int imm (FILE *file, uint32_t type, const union GenNativeInstruction *inst) {
switch (type) {
case GEN_TYPE_UD:
format (file, "0x%xUD", inst->bits3.ud);
return 0;
}
-static int src0 (FILE *file, const struct GenInstruction *inst)
+static int src0 (FILE *file, const union GenNativeInstruction *inst)
{
if (inst->bits1.da1.src0_reg_file == GEN_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src0_reg_type,
}
}
-static int src1 (FILE *file, const struct GenInstruction *inst)
+static int src1 (FILE *file, const union GenNativeInstruction *inst)
{
if (inst->bits1.da1.src1_reg_file == GEN_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src1_reg_type,
[5] = 32,
};
-static int qtr_ctrl(FILE *file, const struct GenInstruction *inst)
+static int qtr_ctrl(FILE *file, const union GenNativeInstruction *inst)
{
int qtr_ctl = inst->header.quarter_control;
int exec_size = esize[inst->header.execution_size];
int gen_disasm (FILE *file, const void *opaque_insn)
{
- const struct GenInstruction *inst = (const struct GenInstruction *) opaque_insn;
+ const union GenNativeInstruction *inst = (const union GenNativeInstruction *) opaque_insn;
int err = 0;
int space = 0;
int gen = 7;
namespace gbe
{
+ extern void decompactInstruction(union GenCompactInstruction *p, union GenNativeInstruction *pOut);
///////////////////////////////////////////////////////////////////////////
// GenContext implementation
///////////////////////////////////////////////////////////////////////////
const LabelIndex label = pair.first;
const int32_t insnID = pair.second;
const int32_t targetID = labelPos.find(label)->second;
- p->patchJMPI(insnID, (targetID - insnID) * 2);
+ p->patchJMPI(insnID, (targetID - insnID));
}
for (auto pair : branchPos3) {
const LabelPair labelPair = pair.first;
const int32_t insnID = pair.second;
- const int32_t jip = labelPos.find(labelPair.l0)->second + labelPair.offset0;
- const int32_t uip = labelPos.find(labelPair.l1)->second + labelPair.offset1;
- assert((jip - insnID) * 2 < 32767 && (jip - insnID) * 2 > -32768);
- assert((uip - insnID) * 2 < 32767 && (uip - insnID) * 2 > -32768);
- p->patchJMPI(insnID, (((uip - insnID) * 2) << 16) | ((jip - insnID) * 2));
+ // FIXME the 'labelPair' implementation must be fixed, as it is hard to
+ // convert InstructionSelection offset to ASM offset since asm maybe compacted
+ const int32_t jip = labelPos.find(labelPair.l0)->second + labelPair.offset0*2;
+ const int32_t uip = labelPos.find(labelPair.l1)->second + labelPair.offset1*2;
+ assert((jip - insnID) < 32767 && (jip - insnID) > -32768);
+ assert((uip - insnID) < 32767 && (uip - insnID) > -32768);
+ p->patchJMPI(insnID, (((uip - insnID)) << 16) | ((jip - insnID)));
}
}
p->SHL(high, low, tmp);
p->MOV(low, GenRegister::immud(0));
- p->patchJMPI(jip1, (p->n_instruction() - jip1) * 2);
+ p->patchJMPI(jip1, (p->n_instruction() - jip1) );
p->curr.predicate = GEN_PREDICATE_NONE;
p->CMP(GEN_CONDITIONAL_LE, exp, GenRegister::immud(31)); //update dst where high != 0
p->curr.predicate = GEN_PREDICATE_NORMAL;
p->CMP(GEN_CONDITIONAL_EQ, high, GenRegister::immud(0x80000000));
p->CMP(GEN_CONDITIONAL_EQ, low, GenRegister::immud(0x0));
p->AND(dst_ud, dst_ud, GenRegister::immud(0xfffffffe));
- p->patchJMPI(jip0, (p->n_instruction() - jip0) * 2);
+ p->patchJMPI(jip0, (p->n_instruction() - jip0));
p->pop();
GenRegister zero = GenRegister::immud(0),
one = GenRegister::immud(1),
imm31 = GenRegister::immud(31);
+ uint32_t jip0;
// (a,b) <- x
loadTopHalf(a, x);
loadBottomHalf(b, x);
p->curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
else
NOT_IMPLEMENTED;
- int jip = -(int)(p->n_instruction() - loop_start + 1) * 2;
+ int distance = -(int)(p->n_instruction() - loop_start );
p->curr.noMask = 1;
+ jip0 = p->n_instruction();
p->JMPI(zero);
- p->patchJMPI(p->n_instruction() - 1, jip + 2);
+ p->patchJMPI(jip0, distance);
p->pop();
// end of loop
}
if (OCL_OUTPUT_ASM) {
std::cout << genKernel->getName() << "'s disassemble begin:" << std::endl;
ir::LabelIndex curLabel = (ir::LabelIndex)0;
+ GenCompactInstruction * pCom = NULL;
+ GenNativeInstruction insn;
std::cout << " L0:" << std::endl;
- for (uint32_t insnID = 0; insnID < genKernel->insnNum; ++insnID) {
+ for (uint32_t insnID = 0; insnID < genKernel->insnNum; ) {
if (labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID) {
std::cout << " L" << curLabel + 1 << ":" << std::endl;
curLabel = (ir::LabelIndex)(curLabel + 1);
}
- std::cout << " (" << std::setw(8) << insnID * 2 << ") ";
- gen_disasm(stdout, &p->store[insnID]);
+ std::cout << " (" << std::setw(8) << insnID << ") ";
+ pCom = (GenCompactInstruction*)&p->store[insnID];
+ if(pCom->bits1.cmpt_control == 1) {
+ decompactInstruction(pCom, &insn);
+ gen_disasm(stdout, &insn);
+ insnID++;
+ } else {
+ gen_disasm(stdout, &p->store[insnID]);
+ insnID = insnID + 2;
+ }
}
std::cout << genKernel->getName() << "'s disassemble end." << std::endl;
}
#define GEN_MAX_GRF 128
/* Instruction format for the execution units */
-struct GenInstruction
-{
- struct {
- uint32_t opcode:7;
- uint32_t pad:1;
- uint32_t access_mode:1;
- uint32_t mask_control:1;
- uint32_t dependency_control:2;
- uint32_t quarter_control:2;
- uint32_t thread_control:2;
- uint32_t predicate_control:4;
- uint32_t predicate_inverse:1;
- uint32_t execution_size:3;
- uint32_t destreg_or_condmod:4;
- uint32_t acc_wr_control:1;
- uint32_t cmpt_control:1;
- uint32_t debug_control:1;
- uint32_t saturate:1;
- } header;
-
- union {
- struct {
- uint32_t dest_reg_file:2;
- uint32_t dest_reg_type:3;
- uint32_t src0_reg_file:2;
- uint32_t src0_reg_type:3;
- uint32_t src1_reg_file:2;
- uint32_t src1_reg_type:3;
- uint32_t nib_ctrl:1;
- uint32_t dest_subreg_nr:5;
- uint32_t dest_reg_nr:8;
- uint32_t dest_horiz_stride:2;
- uint32_t dest_address_mode:1;
- } da1;
-
- struct {
- uint32_t dest_reg_file:2;
- uint32_t dest_reg_type:3;
- uint32_t src0_reg_file:2;
- uint32_t src0_reg_type:3;
- uint32_t src1_reg_file:2; /* 0x00000c00 */
- uint32_t src1_reg_type:3; /* 0x00007000 */
- uint32_t nib_ctrl:1;
- int dest_indirect_offset:10; /* offset against the deref'd address reg */
- uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
- uint32_t dest_horiz_stride:2;
- uint32_t dest_address_mode:1;
- } ia1;
-
- struct {
- uint32_t dest_reg_file:2;
- uint32_t dest_reg_type:3;
- uint32_t src0_reg_file:2;
- uint32_t src0_reg_type:3;
- uint32_t src1_reg_file:2;
- uint32_t src1_reg_type:3;
- uint32_t nib_ctrl:1;
- uint32_t dest_writemask:4;
- uint32_t dest_subreg_nr:1;
- uint32_t dest_reg_nr:8;
- uint32_t dest_horiz_stride:2;
- uint32_t dest_address_mode:1;
- } da16;
- struct {
- uint32_t dest_reg_file:2;
- uint32_t dest_reg_type:3;
- uint32_t src0_reg_file:2;
- uint32_t src0_reg_type:3;
- uint32_t nib_ctrl:1;
- uint32_t dest_writemask:4;
- int dest_indirect_offset:6;
- uint32_t dest_subreg_nr:3;
- uint32_t dest_horiz_stride:2;
- uint32_t dest_address_mode:1;
- } ia16;
+struct GenInstruction {
+ uint32_t low;
+ uint32_t high;
+};
+union GenCompactInstruction {
+ struct GenInstruction low;
+ struct {
struct {
- uint32_t dest_reg_file:2;
- uint32_t dest_reg_type:3;
- uint32_t src0_reg_file:2;
- uint32_t src0_reg_type:3;
- uint32_t src1_reg_file:2;
- uint32_t src1_reg_type:3;
+ uint32_t opcode:7;
+ uint32_t debug_control:1;
+ uint32_t control_index:5;
+ uint32_t data_type_index:5;
+ uint32_t sub_reg_index:5;
+ uint32_t acc_wr_control:1;
+ uint32_t destreg_or_condmod:4;
uint32_t pad:1;
- int jump_count:16;
- } branch_gen6;
-
+ uint32_t cmpt_control:1;
+ uint32_t src0_index_lo:2;
+ } bits1;
struct {
- uint32_t dest_reg_file:1;
- uint32_t flag_subreg_num:1;
- uint32_t pad0:2;
- uint32_t src0_abs:1;
- uint32_t src0_negate:1;
- uint32_t src1_abs:1;
- uint32_t src1_negate:1;
- uint32_t src2_abs:1;
- uint32_t src2_negate:1;
- uint32_t pad1:7;
- uint32_t dest_writemask:4;
- uint32_t dest_subreg_nr:3;
+ uint32_t src0_index_hi:3;
+ uint32_t src1_index:5;
uint32_t dest_reg_nr:8;
- } da3src;
- } bits1;
-
- union {
- struct {
- uint32_t src0_subreg_nr:5;
uint32_t src0_reg_nr:8;
- uint32_t src0_abs:1;
- uint32_t src0_negate:1;
- uint32_t src0_address_mode:1;
- uint32_t src0_horiz_stride:2;
- uint32_t src0_width:3;
- uint32_t src0_vert_stride:4;
- uint32_t flag_sub_reg_nr:1;
- uint32_t flag_reg_nr:1;
- uint32_t pad:5;
- } da1;
-
- struct {
- int src0_indirect_offset:10;
- uint32_t src0_subreg_nr:3;
- uint32_t src0_abs:1;
- uint32_t src0_negate:1;
- uint32_t src0_address_mode:1;
- uint32_t src0_horiz_stride:2;
- uint32_t src0_width:3;
- uint32_t src0_vert_stride:4;
- uint32_t flag_sub_reg_nr:1;
- uint32_t flag_reg_nr:1;
- uint32_t pad:5;
- } ia1;
-
- struct {
- uint32_t src0_swz_x:2;
- uint32_t src0_swz_y:2;
- uint32_t src0_subreg_nr:1;
- uint32_t src0_reg_nr:8;
- uint32_t src0_abs:1;
- uint32_t src0_negate:1;
- uint32_t src0_address_mode:1;
- uint32_t src0_swz_z:2;
- uint32_t src0_swz_w:2;
- uint32_t pad0:1;
- uint32_t src0_vert_stride:4;
- uint32_t flag_sub_reg_nr:1;
- uint32_t flag_reg_nr:1;
- uint32_t pad:5;
- } da16;
-
- struct {
- uint32_t src0_swz_x:2;
- uint32_t src0_swz_y:2;
- int src0_indirect_offset:6;
- uint32_t src0_subreg_nr:3;
- uint32_t src0_abs:1;
- uint32_t src0_negate:1;
- uint32_t src0_address_mode:1;
- uint32_t src0_swz_z:2;
- uint32_t src0_swz_w:2;
- uint32_t pad0:1;
- uint32_t src0_vert_stride:4;
- uint32_t flag_sub_reg_nr:1;
- uint32_t flag_reg_nr:1;
- uint32_t pad:5;
- } ia16;
-
- struct {
- uint32_t src0_rep_ctrl:1;
- uint32_t src0_swizzle:8;
- uint32_t src0_subreg_nr:3;
- uint32_t src0_reg_nr:8;
- uint32_t pad0:1;
- uint32_t src1_rep_ctrl:1;
- uint32_t src1_swizzle:8;
- uint32_t src1_subreg_nr_low:2;
- } da3src;
- } bits2;
-
- union {
- struct {
- uint32_t src1_subreg_nr:5;
uint32_t src1_reg_nr:8;
- uint32_t src1_abs:1;
- uint32_t src1_negate:1;
- uint32_t src1_address_mode:1;
- uint32_t src1_horiz_stride:2;
- uint32_t src1_width:3;
- uint32_t src1_vert_stride:4;
- uint32_t pad0:7;
- } da1;
-
- struct {
- uint32_t src1_swz_x:2;
- uint32_t src1_swz_y:2;
- uint32_t src1_subreg_nr:1;
- uint32_t src1_reg_nr:8;
- uint32_t src1_abs:1;
- uint32_t src1_negate:1;
- uint32_t src1_address_mode:1;
- uint32_t src1_swz_z:2;
- uint32_t src1_swz_w:2;
- uint32_t pad1:1;
- uint32_t src1_vert_stride:4;
- uint32_t pad2:7;
- } da16;
-
- struct {
- int src1_indirect_offset:10;
- uint32_t src1_subreg_nr:3;
- uint32_t src1_abs:1;
- uint32_t src1_negate:1;
- uint32_t src1_address_mode:1;
- uint32_t src1_horiz_stride:2;
- uint32_t src1_width:3;
- uint32_t src1_vert_stride:4;
- uint32_t pad1:7;
- } ia1;
-
- struct {
- uint32_t src1_swz_x:2;
- uint32_t src1_swz_y:2;
- int src1_indirect_offset:6;
- uint32_t src1_subreg_nr:3;
- uint32_t src1_abs:1;
- uint32_t src1_negate:1;
- uint32_t pad0:1;
- uint32_t src1_swz_z:2;
- uint32_t src1_swz_w:2;
- uint32_t pad1:1;
- uint32_t src1_vert_stride:4;
- uint32_t pad2:7;
- } ia16;
-
- struct {
- uint32_t function_control:19;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad1:2;
- uint32_t end_of_thread:1;
- } generic_gen5;
-
- struct {
- uint32_t sub_function_id:3;
- uint32_t pad0:11;
- uint32_t ack_req:1;
- uint32_t notify:2;
- uint32_t pad1:2;
- uint32_t header:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad2:2;
- uint32_t end_of_thread:1;
- } msg_gateway;
-
- struct {
- uint32_t opcode:1;
- uint32_t request:1;
- uint32_t pad0:2;
- uint32_t resource:1;
- uint32_t pad1:14;
- uint32_t header:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad2:2;
- uint32_t end_of_thread:1;
- } spawner_gen5;
-
- /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
- struct {
- uint32_t function:4;
- uint32_t int_type:1;
- uint32_t precision:1;
- uint32_t saturate:1;
- uint32_t data_type:1;
- uint32_t snapshot:1;
- uint32_t pad0:10;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad1:2;
- uint32_t end_of_thread:1;
- } math_gen5;
+ } bits2;
+ };
+};
+union GenNativeInstruction
+{
+ struct {
+ struct GenInstruction low;
+ struct GenInstruction high;
+ };
+ struct {
struct {
- uint32_t bti:8;
- uint32_t sampler:4;
- uint32_t msg_type:5;
- uint32_t simd_mode:2;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad1:2;
- uint32_t end_of_thread:1;
- } sampler_gen7;
-
- /**
- * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
- *
- * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
- **/
- struct {
- uint32_t bti:8;
- uint32_t msg_control:5;
- uint32_t msg_type:3;
- uint32_t pad0:3;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad1:2;
- uint32_t end_of_thread:1;
- } gen6_dp_sampler_const_cache;
-
- /*! Data port untyped read / write messages */
- struct {
- uint32_t bti:8;
- uint32_t rgba:4;
- uint32_t simd_mode:2;
- uint32_t msg_type:4;
- uint32_t category:1;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad2:2;
- uint32_t end_of_thread:1;
- } gen7_untyped_rw;
-
- /*! Data port byte scatter / gather */
- struct {
- uint32_t bti:8;
- uint32_t simd_mode:1;
- uint32_t ignored0:1;
- uint32_t data_size:2;
- uint32_t ignored1:2;
- uint32_t msg_type:4;
- uint32_t category:1;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad2:2;
- uint32_t end_of_thread:1;
- } gen7_byte_rw;
-
- /*! Data port Scratch Read/ write */
- struct {
- uint32_t offset:12;
- uint32_t block_size:2;
- uint32_t ignored0:1;
- uint32_t invalidate_after_read:1;
- uint32_t channel_mode:1;
- uint32_t msg_type:1;
- uint32_t category:1;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad2:2;
- uint32_t end_of_thread:1;
- } gen7_scratch_rw;
-
- /*! Data port OBlock read / write */
- struct {
- uint32_t bti:8;
- uint32_t block_size:3;
- uint32_t ignored:2;
- uint32_t invalidate_after_read:1;
- uint32_t msg_type:4;
- uint32_t category:1;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad2:2;
- uint32_t end_of_thread:1;
- } gen7_oblock_rw;
-
- /*! Data port dword scatter / gather */
- struct {
- uint32_t bti:8;
- uint32_t block_size:2;
- uint32_t ignored0:3;
- uint32_t invalidate_after_read:1;
- uint32_t msg_type:4;
- uint32_t ignored1:1;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad2:2;
- uint32_t end_of_thread:1;
- } gen7_dword_rw;
-
- /*! Data port typed read / write messages */
- struct {
- uint32_t bti:8;
- uint32_t chan_mask:4;
+ uint32_t opcode:7;
uint32_t pad:1;
- uint32_t slot:1;
- uint32_t msg_type:4;
- uint32_t pad2:1;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad3:2;
- uint32_t end_of_thread:1;
- } gen7_typed_rw;
-
- /*! Memory fence */
- struct {
- uint32_t bti:8;
- uint32_t pad:5;
- uint32_t commit_enable:1;
- uint32_t msg_type:4;
- uint32_t pad2:1;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad3:2;
- uint32_t end_of_thread:1;
- } gen7_memory_fence;
-
- /*! atomic messages */
- struct {
- uint32_t bti:8;
- uint32_t aop_type:4;
- uint32_t simd_mode:1;
- uint32_t return_data:1;
- uint32_t msg_type:4;
- uint32_t category:1;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad3:2;
- uint32_t end_of_thread:1;
- } gen7_atomic_op;
-
- struct {
- uint32_t src1_subreg_nr_high:1;
- uint32_t src1_reg_nr:8;
- uint32_t pad0:1;
- uint32_t src2_rep_ctrl:1;
- uint32_t src2_swizzle:8;
- uint32_t src2_subreg_nr:3;
- uint32_t src2_reg_nr:8;
- uint32_t pad1:2;
- } da3src;
-
- /*! Message gateway */
- struct {
- uint32_t subfunc:3;
- uint32_t pad:11;
- uint32_t ackreq:1;
- uint32_t notify:2;
- uint32_t pad2:2;
- uint32_t header_present:1;
- uint32_t response_length:5;
- uint32_t msg_length:4;
- uint32_t pad3:2;
- uint32_t end_of_thread:1;
- } gen7_msg_gw;
-
- struct {
- uint32_t jip:16;
- uint32_t uip:16;
- } gen7_branch;
-
- int d;
- uint32_t ud;
- float f;
- } bits3;
+ uint32_t access_mode:1;
+ uint32_t mask_control:1;
+ uint32_t dependency_control:2;
+ uint32_t quarter_control:2;
+ uint32_t thread_control:2;
+ uint32_t predicate_control:4;
+ uint32_t predicate_inverse:1;
+ uint32_t execution_size:3;
+ uint32_t destreg_or_condmod:4;
+ uint32_t acc_wr_control:1;
+ uint32_t cmpt_control:1;
+ uint32_t debug_control:1;
+ uint32_t saturate:1;
+ } header;
+
+ union {
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t nib_ctrl:1;
+ uint32_t dest_subreg_nr:5;
+ uint32_t dest_reg_nr:8;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } da1;
+
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2; /* 0x00000c00 */
+ uint32_t src1_reg_type:3; /* 0x00007000 */
+ uint32_t nib_ctrl:1;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } ia1;
+
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t nib_ctrl:1;
+ uint32_t dest_writemask:4;
+ uint32_t dest_subreg_nr:1;
+ uint32_t dest_reg_nr:8;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } da16;
+
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t nib_ctrl:1;
+ uint32_t dest_writemask:4;
+ int dest_indirect_offset:6;
+ uint32_t dest_subreg_nr:3;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } ia16;
+
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t pad:1;
+ int jump_count:16;
+ } branch_gen6;
+
+ struct {
+ uint32_t dest_reg_file:1;
+ uint32_t flag_subreg_num:1;
+ uint32_t pad0:2;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src2_abs:1;
+ uint32_t src2_negate:1;
+ uint32_t pad1:7;
+ uint32_t dest_writemask:4;
+ uint32_t dest_subreg_nr:3;
+ uint32_t dest_reg_nr:8;
+ } da3src;
+ } bits1;
+
+ union {
+ struct {
+ uint32_t src0_subreg_nr:5;
+ uint32_t src0_reg_nr:8;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_horiz_stride:2;
+ uint32_t src0_width:3;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_sub_reg_nr:1;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:5;
+ } da1;
+
+ struct {
+ int src0_indirect_offset:10;
+ uint32_t src0_subreg_nr:3;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_horiz_stride:2;
+ uint32_t src0_width:3;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_sub_reg_nr:1;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:5;
+ } ia1;
+
+ struct {
+ uint32_t src0_swz_x:2;
+ uint32_t src0_swz_y:2;
+ uint32_t src0_subreg_nr:1;
+ uint32_t src0_reg_nr:8;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_swz_z:2;
+ uint32_t src0_swz_w:2;
+ uint32_t pad0:1;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_sub_reg_nr:1;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:5;
+ } da16;
+
+ struct {
+ uint32_t src0_swz_x:2;
+ uint32_t src0_swz_y:2;
+ int src0_indirect_offset:6;
+ uint32_t src0_subreg_nr:3;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_swz_z:2;
+ uint32_t src0_swz_w:2;
+ uint32_t pad0:1;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_sub_reg_nr:1;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:5;
+ } ia16;
+
+ struct {
+ uint32_t src0_rep_ctrl:1;
+ uint32_t src0_swizzle:8;
+ uint32_t src0_subreg_nr:3;
+ uint32_t src0_reg_nr:8;
+ uint32_t pad0:1;
+ uint32_t src1_rep_ctrl:1;
+ uint32_t src1_swizzle:8;
+ uint32_t src1_subreg_nr_low:2;
+ } da3src;
+ } bits2;
+
+ union {
+ struct {
+ uint32_t src1_subreg_nr:5;
+ uint32_t src1_reg_nr:8;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src1_address_mode:1;
+ uint32_t src1_horiz_stride:2;
+ uint32_t src1_width:3;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad0:7;
+ } da1;
+
+ struct {
+ uint32_t src1_swz_x:2;
+ uint32_t src1_swz_y:2;
+ uint32_t src1_subreg_nr:1;
+ uint32_t src1_reg_nr:8;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src1_address_mode:1;
+ uint32_t src1_swz_z:2;
+ uint32_t src1_swz_w:2;
+ uint32_t pad1:1;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad2:7;
+ } da16;
+
+ struct {
+ int src1_indirect_offset:10;
+ uint32_t src1_subreg_nr:3;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src1_address_mode:1;
+ uint32_t src1_horiz_stride:2;
+ uint32_t src1_width:3;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad1:7;
+ } ia1;
+
+ struct {
+ uint32_t src1_swz_x:2;
+ uint32_t src1_swz_y:2;
+ int src1_indirect_offset:6;
+ uint32_t src1_subreg_nr:3;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t pad0:1;
+ uint32_t src1_swz_z:2;
+ uint32_t src1_swz_w:2;
+ uint32_t pad1:1;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad2:7;
+ } ia16;
+
+ struct {
+ uint32_t function_control:19;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } generic_gen5;
+
+ struct {
+ uint32_t sub_function_id:3;
+ uint32_t pad0:11;
+ uint32_t ack_req:1;
+ uint32_t notify:2;
+ uint32_t pad1:2;
+ uint32_t header:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } msg_gateway;
+
+ struct {
+ uint32_t opcode:1;
+ uint32_t request:1;
+ uint32_t pad0:2;
+ uint32_t resource:1;
+ uint32_t pad1:14;
+ uint32_t header:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } spawner_gen5;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
+ struct {
+ uint32_t function:4;
+ uint32_t int_type:1;
+ uint32_t precision:1;
+ uint32_t saturate:1;
+ uint32_t data_type:1;
+ uint32_t snapshot:1;
+ uint32_t pad0:10;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } math_gen5;
+
+ struct {
+ uint32_t bti:8;
+ uint32_t sampler:4;
+ uint32_t msg_type:5;
+ uint32_t simd_mode:2;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } sampler_gen7;
+
+ /**
+ * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
+ *
+ * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
+ **/
+ struct {
+ uint32_t bti:8;
+ uint32_t msg_control:5;
+ uint32_t msg_type:3;
+ uint32_t pad0:3;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } gen6_dp_sampler_const_cache;
+
+ /*! Data port untyped read / write messages */
+ struct {
+ uint32_t bti:8;
+ uint32_t rgba:4;
+ uint32_t simd_mode:2;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_untyped_rw;
+
+ /*! Data port byte scatter / gather */
+ struct {
+ uint32_t bti:8;
+ uint32_t simd_mode:1;
+ uint32_t ignored0:1;
+ uint32_t data_size:2;
+ uint32_t ignored1:2;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_byte_rw;
+
+ /*! Data port Scratch Read/ write */
+ struct {
+ uint32_t offset:12;
+ uint32_t block_size:2;
+ uint32_t ignored0:1;
+ uint32_t invalidate_after_read:1;
+ uint32_t channel_mode:1;
+ uint32_t msg_type:1;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_scratch_rw;
+
+ /*! Data port OBlock read / write */
+ struct {
+ uint32_t bti:8;
+ uint32_t block_size:3;
+ uint32_t ignored:2;
+ uint32_t invalidate_after_read:1;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_oblock_rw;
+
+ /*! Data port dword scatter / gather */
+ struct {
+ uint32_t bti:8;
+ uint32_t block_size:2;
+ uint32_t ignored0:3;
+ uint32_t invalidate_after_read:1;
+ uint32_t msg_type:4;
+ uint32_t ignored1:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_dword_rw;
+
+ /*! Data port typed read / write messages */
+ struct {
+ uint32_t bti:8;
+ uint32_t chan_mask:4;
+ uint32_t pad:1;
+ uint32_t slot:1;
+ uint32_t msg_type:4;
+ uint32_t pad2:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad3:2;
+ uint32_t end_of_thread:1;
+ } gen7_typed_rw;
+
+ /*! Memory fence */
+ struct {
+ uint32_t bti:8;
+ uint32_t pad:5;
+ uint32_t commit_enable:1;
+ uint32_t msg_type:4;
+ uint32_t pad2:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad3:2;
+ uint32_t end_of_thread:1;
+ } gen7_memory_fence;
+
+ /*! atomic messages */
+ struct {
+ uint32_t bti:8;
+ uint32_t aop_type:4;
+ uint32_t simd_mode:1;
+ uint32_t return_data:1;
+ uint32_t msg_type:4;
+ uint32_t category:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad3:2;
+ uint32_t end_of_thread:1;
+ } gen7_atomic_op;
+
+ struct {
+ uint32_t src1_subreg_nr_high:1;
+ uint32_t src1_reg_nr:8;
+ uint32_t pad0:1;
+ uint32_t src2_rep_ctrl:1;
+ uint32_t src2_swizzle:8;
+ uint32_t src2_subreg_nr:3;
+ uint32_t src2_reg_nr:8;
+ uint32_t pad1:2;
+ } da3src;
+
+ /*! Message gateway */
+ struct {
+ uint32_t subfunc:3;
+ uint32_t pad:11;
+ uint32_t ackreq:1;
+ uint32_t notify:2;
+ uint32_t pad2:2;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad3:2;
+ uint32_t end_of_thread:1;
+ } gen7_msg_gw;
+
+ struct {
+ uint32_t jip:16;
+ uint32_t uip:16;
+ } gen7_branch;
+
+ int d;
+ uint32_t ud;
+ float f;
+ } bits3;
+ };
};
#endif /* __GEN_DEFS_HPP__ */
#include "backend/gen_encoder.hpp"
#include <cstring>
+
namespace gbe
{
+ extern bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split);
+ extern bool compactAlu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src, uint32_t condition, bool split);
//////////////////////////////////////////////////////////////////////////
// Some helper functions to encode
//////////////////////////////////////////////////////////////////////////
}
static void setMessageDescriptor(GenEncoder *p,
- GenInstruction *inst,
+ GenNativeInstruction *inst,
enum GenMessageTarget sfid,
unsigned msg_length,
unsigned response_length,
}
static void setDPUntypedRW(GenEncoder *p,
- GenInstruction *insn,
+ GenNativeInstruction *insn,
uint32_t bti,
uint32_t rgba,
uint32_t msg_type,
}
static void setDPByteScatterGather(GenEncoder *p,
- GenInstruction *insn,
+ GenNativeInstruction *insn,
uint32_t bti,
uint32_t elem_size,
uint32_t msg_type,
}
#if 0
static void setOBlockRW(GenEncoder *p,
- GenInstruction *insn,
+ GenNativeInstruction *insn,
uint32_t bti,
uint32_t size,
uint32_t msg_type,
#endif
static void setSamplerMessage(GenEncoder *p,
- GenInstruction *insn,
+ GenNativeInstruction *insn,
unsigned char bti,
unsigned char sampler,
uint32_t msg_type,
static void setTypedWriteMessage(GenEncoder *p,
- GenInstruction *insn,
+ GenNativeInstruction *insn,
unsigned char bti,
unsigned char msg_type,
uint32_t msg_length,
insn->bits3.gen7_typed_rw.msg_type = msg_type;
}
static void setDWordScatterMessgae(GenEncoder *p,
- GenInstruction *insn,
+ GenNativeInstruction *insn,
uint32_t bti,
uint32_t block_size,
uint32_t msg_type,
curr = stack[--stateNum];
}
- void GenEncoder::setHeader(GenInstruction *insn) {
+ void GenEncoder::setHeader(GenNativeInstruction *insn) {
if (this->curr.execWidth == 8)
insn->header.execution_size = GEN_WIDTH_8;
else if (this->curr.execWidth == 16)
insn->header.saturate = this->curr.saturate;
}
- void GenEncoder::setDst(GenInstruction *insn, GenRegister dest) {
+ void GenEncoder::setDst(GenNativeInstruction *insn, GenRegister dest) {
if (dest.file != GEN_ARCHITECTURE_REGISTER_FILE)
assert(dest.nr < 128);
insn->bits1.da1.dest_horiz_stride = dest.hstride;
}
- void GenEncoder::setSrc0(GenInstruction *insn, GenRegister reg) {
+ void GenEncoder::setSrc0(GenNativeInstruction *insn, GenRegister reg) {
if (reg.file != GEN_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
}
}
- void GenEncoder::setSrc1(GenInstruction *insn, GenRegister reg) {
+ void GenEncoder::setSrc1(GenNativeInstruction *insn, GenRegister reg) {
assert(reg.nr < 128);
assert(reg.file != GEN_ARCHITECTURE_REGISTER_FILE || reg.nr == 0);
}
void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
uint32_t msg_length = 0;
uint32_t response_length = 0;
}
void GenEncoder::UNTYPED_WRITE(GenRegister msg, uint32_t bti, uint32_t elemNum) {
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
assert(elemNum >= 1 || elemNum <= 4);
uint32_t msg_length = 0;
uint32_t response_length = 0;
}
void GenEncoder::BYTE_GATHER(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemSize) {
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
uint32_t msg_length = 0;
uint32_t response_length = 0;
if (this->curr.execWidth == 8) {
}
void GenEncoder::BYTE_SCATTER(GenRegister msg, uint32_t bti, uint32_t elemSize) {
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
uint32_t msg_length = 0;
uint32_t response_length = 0;
this->setHeader(insn);
}
void GenEncoder::DWORD_GATHER(GenRegister dst, GenRegister src, uint32_t bti) {
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
uint32_t msg_length = 0;
uint32_t response_length = 0;
uint32_t block_size = 0;
}
void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum) {
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
uint32_t msg_length = 0;
uint32_t response_length = 0;
NOT_SUPPORTED;
}
+ GenCompactInstruction *GenEncoder::nextCompact(uint32_t opcode) {
+ GenCompactInstruction insn;
+ std::memset(&insn, 0, sizeof(GenCompactInstruction));
+ insn.bits1.opcode = opcode;
+ this->store.push_back(insn.low);
+ return (GenCompactInstruction *)&this->store.back();
+ }
- GenInstruction *GenEncoder::next(uint32_t opcode) {
- GenInstruction insn;
- std::memset(&insn, 0, sizeof(GenInstruction));
+ GenNativeInstruction *GenEncoder::next(uint32_t opcode) {
+ GenNativeInstruction insn;
+ std::memset(&insn, 0, sizeof(GenNativeInstruction));
insn.header.opcode = opcode;
- this->store.push_back(insn);
- return &this->store.back();
+ this->store.push_back(insn.low);
+ this->store.push_back(insn.high);
+ return (GenNativeInstruction *)(&this->store.back()-1);
}
INLINE void _handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst,
int w = p->curr.execWidth;
p->push();
p->curr.nibControl = 0;
- GenInstruction *insn = p->next(opcode);
+ GenNativeInstruction *insn = p->next(opcode);
p->setHeader(insn);
p->setDst(insn, dst);
p->setSrc0(insn, src0);
}
p->pop();
} else if (needToSplitAlu1(p, dst, src) == false) {
- GenInstruction *insn = p->next(opcode);
+ if(compactAlu1(p, opcode, dst, src, condition, false))
+ return;
+ GenNativeInstruction *insn = p->next(opcode);
if (condition != 0) {
GBE_ASSERT(opcode == GEN_OPCODE_MOV ||
opcode == GEN_OPCODE_NOT);
p->setDst(insn, dst);
p->setSrc0(insn, src);
} else {
- GenInstruction *insnQ1, *insnQ2;
+ GenNativeInstruction *insnQ1, *insnQ2;
// Instruction for the first quarter
insnQ1 = p->next(opcode);
if (dst.isdf() && src0.isdf() && src1.isdf()) {
handleDouble(p, opcode, dst, src0, src1);
} else if (needToSplitAlu2(p, dst, src0, src1) == false) {
- GenInstruction *insn = p->next(opcode);
+ if(compactAlu2(p, opcode, dst, src0, src1, condition, false))
+ return;
+ GenNativeInstruction *insn = p->next(opcode);
if (condition != 0) {
GBE_ASSERT(opcode == GEN_OPCODE_OR ||
opcode == GEN_OPCODE_XOR ||
p->setSrc0(insn, src0);
p->setSrc1(insn, src1);
} else {
- GenInstruction *insnQ1, *insnQ2;
+ GenNativeInstruction *insnQ1, *insnQ2;
// Instruction for the first quarter
insnQ1 = p->next(opcode);
#define NO_SWIZZLE ((0<<0) | (1<<2) | (2<<4) | (3<<6))
- static GenInstruction *alu3(GenEncoder *p,
+ static GenNativeInstruction *alu3(GenEncoder *p,
uint32_t opcode,
GenRegister dest,
GenRegister src0,
GenRegister src1,
GenRegister src2)
{
- GenInstruction *insn = p->next(opcode);
+ GenNativeInstruction *insn = p->next(opcode);
assert(dest.file == GEN_GENERAL_REGISTER_FILE);
assert(dest.nr < 128);
// Emit second half of the instruction
if (p->curr.execWidth == 16) {
- GenInstruction q1Insn = *insn;
+ GenNativeInstruction q1Insn = *insn;
insn = p->next(opcode);
*insn = q1Insn;
insn->header.quarter_control = GEN_COMPRESSION_Q2;
void GenEncoder::NOP(void) {
- GenInstruction *insn = this->next(GEN_OPCODE_NOP);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_NOP);
this->setDst(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
this->setSrc0(insn, GenRegister::retype(GenRegister::f4grf(0,0), GEN_TYPE_UD));
this->setSrc1(insn, GenRegister::immud(0x0));
}
void GenEncoder::BARRIER(GenRegister src) {
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
this->setHeader(insn);
this->setDst(insn, GenRegister::null());
this->setSrc0(insn, src);
insn->bits3.msg_gateway.notify = 0x1;
}
void GenEncoder::FENCE(GenRegister dst) {
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
this->setHeader(insn);
this->setDst(insn, dst);
this->setSrc0(insn, dst);
ALU2_BRA(BRC)
void GenEncoder::patchJMPI(uint32_t insnID, int32_t jumpDistance) {
- GenInstruction &insn = this->store[insnID];
+ GenNativeInstruction &insn = *(GenNativeInstruction *)&this->store[insnID];
GBE_ASSERT(insnID < this->store.size());
GBE_ASSERT(insn.header.opcode == GEN_OPCODE_JMPI ||
insn.header.opcode == GEN_OPCODE_BRD ||
// for all the branching instruction. And need to adjust the distance
// for those branch instruction's start point and end point contains
// this instruction.
- GenInstruction &insn2 = this->store[insnID+1];
+ GenNativeInstruction &insn2 = *(GenNativeInstruction *)&this->store[insnID+2];
GBE_ASSERT(insn2.header.opcode == GEN_OPCODE_NOP);
insn.header.opcode = GEN_OPCODE_ADD;
this->setDst(&insn, GenRegister::ip());
} else {
insn.header.predicate_inverse ^= 1;
this->setSrc1(&insn, GenRegister::immd(2));
- GenInstruction &insn2 = this->store[insnID+1];
+ GenNativeInstruction &insn2 = *(GenNativeInstruction *)&this->store[insnID+2];
GBE_ASSERT(insn2.header.opcode == GEN_OPCODE_NOP);
GBE_ASSERT(insnID < this->store.size());
insn2.header.predicate_control = GEN_PREDICATE_NONE;
void GenEncoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
if (needToSplitCmp(this, src0, src1) == false) {
- GenInstruction *insn = this->next(GEN_OPCODE_CMP);
+ if(compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
+ return;
+ }
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP);
this->setHeader(insn);
insn->header.destreg_or_condmod = conditional;
insn->header.thread_control = GEN_THREAD_SWITCH;
this->setSrc0(insn, src0);
this->setSrc1(insn, src1);
} else {
- GenInstruction *insnQ1, *insnQ2;
+ GenNativeInstruction *insnQ1, *insnQ2;
// Instruction for the first quarter
insnQ1 = this->next(GEN_OPCODE_CMP);
GenRegister src0,
GenRegister src1)
{
- GenInstruction *insn = this->next(GEN_OPCODE_SEL);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEL);
GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE);
this->setHeader(insn);
insn->header.destreg_or_condmod = conditional;
}
void GenEncoder::WAIT(void) {
- GenInstruction *insn = this->next(GEN_OPCODE_WAIT);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT);
GenRegister src = GenRegister::notification1();
this->setDst(insn, GenRegister::null());
this->setSrc0(insn, src);
}
void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1) {
- GenInstruction *insn = this->next(GEN_OPCODE_MATH);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_MATH);
assert(dst.file == GEN_GENERAL_REGISTER_FILE);
assert(src0.file == GEN_GENERAL_REGISTER_FILE);
assert(src1.file == GEN_GENERAL_REGISTER_FILE);
insn->header.quarter_control = GEN_COMPRESSION_Q1;
if(this->curr.execWidth == 16) {
- GenInstruction *insn2 = this->next(GEN_OPCODE_MATH);
+ GenNativeInstruction *insn2 = this->next(GEN_OPCODE_MATH);
GenRegister new_dest, new_src0, new_src1;
new_dest = GenRegister::QnPhysical(dst, 1);
new_src0 = GenRegister::QnPhysical(src0, 1);
}
void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src) {
- GenInstruction *insn = this->next(GEN_OPCODE_MATH);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_MATH);
assert(dst.file == GEN_GENERAL_REGISTER_FILE);
assert(src.file == GEN_GENERAL_REGISTER_FILE);
assert(dst.hstride == GEN_HORIZONTAL_STRIDE_1);
msg_length++;
uint32_t simd_mode = (simdWidth == 16) ?
GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8;
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
this->setHeader(insn);
this->setDst(insn, dest);
this->setSrc0(insn, msg);
void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned char bti)
{
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
uint32_t msg_type = GEN_TYPED_WRITE;
uint32_t msg_length = header_present ? 9 : 8;
this->setHeader(insn);
setTypedWriteMessage(this, insn, bti, msg_type, msg_length, header_present);
}
static void setScratchMessage(GenEncoder *p,
- GenInstruction *insn,
+ GenNativeInstruction *insn,
uint32_t offset,
uint32_t block_size,
uint32_t channel_mode,
{
assert(src_num == 1 || src_num ==2);
uint32_t block_size = src_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2;
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
this->setHeader(insn);
this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
this->setSrc0(insn, msg);
{
assert(dst_num == 1 || dst_num ==2);
uint32_t block_size = dst_num == 1 ? GEN_SCRATCH_BLOCK_SIZE_1 : GEN_SCRATCH_BLOCK_SIZE_2;
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
this->setHeader(insn);
this->setDst(insn, dst);
this->setSrc0(insn, src);
}
void GenEncoder::EOT(uint32_t msg) {
- GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
this->setSrc0(insn, GenRegister::ud8grf(msg,0));
this->setSrc1(insn, GenRegister::immud(0));
////////////////////////////////////////////////////////////////////////
// Helper functions to encode
////////////////////////////////////////////////////////////////////////
- void setHeader(GenInstruction *insn);
- void setDst(GenInstruction *insn, GenRegister dest);
- void setSrc0(GenInstruction *insn, GenRegister reg);
- void setSrc1(GenInstruction *insn, GenRegister reg);
- GenInstruction *next(uint32_t opcode);
+ void setHeader(GenNativeInstruction *insn);
+ void setDst(GenNativeInstruction *insn, GenRegister dest);
+ void setSrc0(GenNativeInstruction *insn, GenRegister reg);
+ void setSrc1(GenNativeInstruction *insn, GenRegister reg);
+ GenCompactInstruction *nextCompact(uint32_t opcode);
+ GenNativeInstruction *next(uint32_t opcode);
uint32_t n_instruction(void) const { return store.size(); }
GBE_CLASS(GenEncoder); //!< Use custom allocators
};
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Ruiling Song <ruiling.song@intel.com>
+ */
+#include "backend/gen_defs.hpp"
+#include "backend/gen_encoder.hpp"
+#include <cstring>
+
+namespace gbe {
+
+ struct compact_table_entry {
+ uint32_t bit_pattern;
+ uint32_t index;
+ };
+
+ static compact_table_entry control_table[] = {
+ {0b0000000000000000010, 0},
+ {0b0000100000000000000, 1},
+ {0b0000100000000000001, 2},
+ {0b0000100000000000010, 3},
+ {0b0000100000000000011, 4},
+ {0b0000100000000000100, 5},
+ {0b0000100000000000101, 6},
+ {0b0000100000000000111, 7},
+ {0b0000100000000001000, 8},
+ {0b0000100000000001001, 9},
+ {0b0000100000000001101, 10},
+ {0b0000110000000000000, 11},
+ {0b0000110000000000001, 12},
+ {0b0000110000000000010, 13},
+ {0b0000110000000000011, 14},
+ {0b0000110000000000100, 15},
+ {0b0000110000000000101, 16},
+ {0b0000110000000000111, 17},
+ {0b0000110000000001001, 18},
+ {0b0000110000000001101, 19},
+ {0b0000110000000010000, 20},
+ {0b0000110000100000000, 21},
+ {0b0001000000000000000, 22},
+ {0b0001000000000000010, 23},
+ {0b0001000000000000100, 24},
+ {0b0001000000100000000, 25},
+ {0b0010110000000000000, 26},
+ {0b0010110000000010000, 27},
+ {0b0011000000000000000, 28},
+ {0b0011000000100000000, 29},
+ {0b0101000000000000000, 30},
+ {0b0101000000100000000, 31},
+ };
+
+ static compact_table_entry data_type_table[] = {
+ {0b000000001000001100, 20},
+ {0b001000000000000001, 0},
+ {0b001000000000100000, 1},
+ {0b001000000000100001, 2},
+ {0b001000000000111101, 21},
+ {0b001000000001100001, 3},
+ {0b001000000010100101, 22},
+ {0b001000000010111101, 4},
+ {0b001000001011111101, 5},
+ {0b001000001110100001, 6},
+ {0b001000001110100101, 7},
+ {0b001000001110111101, 8},
+ {0b001000010000100000, 23},
+ {0b001000010000100001, 9},
+ {0b001000110000100000, 10},
+ {0b001000110000100001, 11},
+ {0b001001010010100100, 24},
+ {0b001001010010100101, 12},
+ {0b001001110010000100, 25},
+ {0b001001110010100100, 13},
+ {0b001001110010100101, 14},
+ {0b001010010100001001, 26},
+ {0b001010010100101000, 30},
+ {0b001010110100101000, 31},
+ {0b001011110110101100, 29},
+ {0b001101111110111101, 27},
+ {0b001111001110111101, 15},
+ {0b001111011110011101, 16},
+ {0b001111011110111100, 17},
+ {0b001111011110111101, 18},
+ {0b001111111110111100, 19},
+ {0b001111111110111101, 28},
+ };
+
+ static compact_table_entry data_type_decompact[] = {
+ {0b001000000000000001, 0},
+ {0b001000000000100000, 1},
+ {0b001000000000100001, 2},
+ {0b001000000001100001, 3},
+ {0b001000000010111101, 4},
+ {0b001000001011111101, 5},
+ {0b001000001110100001, 6},
+ {0b001000001110100101, 7},
+ {0b001000001110111101, 8},
+ {0b001000010000100001, 9},
+ {0b001000110000100000, 10},
+ {0b001000110000100001, 11},
+ {0b001001010010100101, 12},
+ {0b001001110010100100, 13},
+ {0b001001110010100101, 14},
+ {0b001111001110111101, 15},
+ {0b001111011110011101, 16},
+ {0b001111011110111100, 17},
+ {0b001111011110111101, 18},
+ {0b001111111110111100, 19},
+ {0b000000001000001100, 20},
+ {0b001000000000111101, 21},
+ {0b001000000010100101, 22},
+ {0b001000010000100000, 23},
+ {0b001001010010100100, 24},
+ {0b001001110010000100, 25},
+ {0b001010010100001001, 26},
+ {0b001101111110111101, 27},
+ {0b001111111110111101, 28},
+ {0b001011110110101100, 29},
+ {0b001010010100101000, 30},
+ {0b001010110100101000, 31},
+ };
+
+ static compact_table_entry subreg_table[] = {
+ {0b000000000000000, 0},
+ {0b000000000000001, 1},
+ {0b000000000001000, 2},
+ {0b000000000001111, 3},
+ {0b000000000010000, 4},
+ {0b000000010000000, 5},
+ {0b000000100000000, 6},
+ {0b000000110000000, 7},
+ {0b000001000000000, 8},
+ {0b000001000010000, 9},
+ {0b000001010000000, 10},
+ {0b001000000000000, 11},
+ {0b001000000000001, 12},
+ {0b001000010000001, 13},
+ {0b001000010000010, 14},
+ {0b001000010000011, 15},
+ {0b001000010000100, 16},
+ {0b001000010000111, 17},
+ {0b001000010001000, 18},
+ {0b001000010001110, 19},
+ {0b001000010001111, 20},
+ {0b001000110000000, 21},
+ {0b001000111101000, 22},
+ {0b010000000000000, 23},
+ {0b010000110000000, 24},
+ {0b011000000000000, 25},
+ {0b011110010000111, 26},
+ {0b100000000000000, 27},
+ {0b101000000000000, 28},
+ {0b110000000000000, 29},
+ {0b111000000000000, 30},
+ {0b111000000011100, 31},
+ };
+
+ static compact_table_entry srcreg_table[] = {
+ {0b000000000000, 0},
+ {0b000000000010, 1},
+ {0b000000010000, 2},
+ {0b000000010010, 3},
+ {0b000000011000, 4},
+ {0b000000100000, 5},
+ {0b000000101000, 6},
+ {0b000001001000, 7},
+ {0b000001010000, 8},
+ {0b000001110000, 9},
+ {0b000001111000, 10},
+ {0b001100000000, 11},
+ {0b001100000010, 12},
+ {0b001100001000, 13},
+ {0b001100010000, 14},
+ {0b001100010010, 15},
+ {0b001100100000, 16},
+ {0b001100101000, 17},
+ {0b001100111000, 18},
+ {0b001101000000, 19},
+ {0b001101000010, 20},
+ {0b001101001000, 21},
+ {0b001101010000, 22},
+ {0b001101100000, 23},
+ {0b001101101000, 24},
+ {0b001101110000, 25},
+ {0b001101110001, 26},
+ {0b001101111000, 27},
+ {0b010001101000, 28},
+ {0b010001101001, 29},
+ {0b010001101010, 30},
+ {0b010110001000, 31},
+ };
+
+ static int cmp_key(const void *p1, const void*p2) {
+ const compact_table_entry * px = (compact_table_entry *)p1;
+ const compact_table_entry * py = (compact_table_entry *)p2;
+ return (px->bit_pattern) - py->bit_pattern;
+ }
+ union ControlBits{
+ struct {
+ uint32_t access_mode:1;
+ uint32_t mask_control:1;
+ uint32_t dependency_control:2;
+ uint32_t quarter_control:2;
+ uint32_t thread_control:2;
+ uint32_t predicate_control:4;
+ uint32_t predicate_inverse:1;
+ uint32_t execution_size:3;
+ uint32_t saturate:1;
+ uint32_t flag_sub_reg_nr:1;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:23;
+ };
+ uint32_t data;
+ };
+ union DataTypeBits{
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ uint32_t pad:14;
+ };
+ uint32_t data;
+ };
+ union SubRegBits {
+ struct {
+ uint32_t dest_subreg_nr:5;
+ uint32_t src0_subreg_nr:5;
+ uint32_t src1_subreg_nr:5;
+ uint32_t pad:17;
+ };
+ uint32_t data;
+ };
+ union SrcRegBits {
+ struct {
+ uint32_t src_abs:1;
+ uint32_t src_negate:1;
+ uint32_t src_address_mode:1;
+ uint32_t src_horiz_stride:2;
+ uint32_t src_width:3;
+ uint32_t src_vert_stride:4;
+ uint32_t pad:20;
+ };
+ uint32_t data;
+ };
+
+ void decompactInstruction(GenCompactInstruction * p, GenNativeInstruction *pOut) {
+
+ memset(pOut, 0, sizeof(GenNativeInstruction));
+ union ControlBits control_bits;
+ control_bits.data = control_table[(uint32_t)p->bits1.control_index].bit_pattern;
+ pOut->low.low = (uint32_t)p->bits1.opcode | ((control_bits.data & 0xffff) << 8);
+ pOut->header.destreg_or_condmod = p->bits1.destreg_or_condmod;
+ pOut->header.saturate = control_bits.saturate;
+ pOut->header.acc_wr_control = p->bits1.acc_wr_control;
+ pOut->header.cmpt_control = p->bits1.cmpt_control;
+ pOut->header.debug_control = p->bits1.debug_control;
+
+ union DataTypeBits data_type_bits;
+ union SubRegBits subreg_bits;
+ union SrcRegBits src0_bits;
+ data_type_bits.data = data_type_decompact[(uint32_t)p->bits1.data_type_index].bit_pattern;
+ subreg_bits.data = subreg_table[(uint32_t)p->bits1.sub_reg_index].bit_pattern;
+ src0_bits.data = srcreg_table[p->bits1.src0_index_lo | p->bits2.src0_index_hi << 2].bit_pattern;
+
+ pOut->low.high |= data_type_bits.data & 0x7fff;
+ pOut->bits1.da1.dest_horiz_stride = data_type_bits.dest_horiz_stride;
+ pOut->bits1.da1.dest_address_mode = data_type_bits.dest_address_mode;
+ pOut->bits1.da1.dest_reg_nr = p->bits2.dest_reg_nr;
+ pOut->bits1.da1.dest_subreg_nr = subreg_bits.dest_subreg_nr;
+
+ pOut->bits2.da1.src0_subreg_nr = subreg_bits.src0_subreg_nr;
+ pOut->bits2.da1.src0_reg_nr = p->bits2.src0_reg_nr;
+ pOut->high.low |= (src0_bits.data << 13);
+ pOut->bits2.da1.flag_sub_reg_nr = control_bits.flag_sub_reg_nr;
+ pOut->bits2.da1.flag_reg_nr = control_bits.flag_reg_nr;
+
+ if(data_type_bits.src1_reg_file == GEN_IMMEDIATE_VALUE) {
+ uint32_t imm = (uint32_t)p->bits2.src1_reg_nr | (p->bits2.src1_index<<8);
+ pOut->bits3.ud = imm & 0x1000 ? (imm | 0xfffff000) : imm;
+ } else {
+ union SrcRegBits src1_bits;
+ src1_bits.data = srcreg_table[p->bits2.src1_index].bit_pattern;
+ pOut->bits3.da1.src1_subreg_nr = subreg_bits.src1_subreg_nr;
+ pOut->bits3.da1.src1_reg_nr = p->bits2.src1_reg_nr;
+ pOut->high.high |= (src1_bits.data << 13);
+ }
+ }
+
+ int compactControlBits(GenEncoder *p, uint32_t quarter, uint32_t execWidth) {
+
+ const GenInstructionState *s = &p->curr;
+ // some quick check
+ if(s->nibControl != 0)
+ return -1;
+ if(s->predicate > GEN_PREDICATE_NORMAL)
+ return -1;
+ if(s->flag == 1)
+ return -1;
+
+ ControlBits b;
+ b.data = 0;
+
+ if (execWidth == 8)
+ b.execution_size = GEN_WIDTH_8;
+ else if (execWidth == 16)
+ b.execution_size = GEN_WIDTH_16;
+ else if (execWidth == 1)
+ b.execution_size = GEN_WIDTH_1;
+ else
+ NOT_IMPLEMENTED;
+
+ b.mask_control = s->noMask;
+ b.quarter_control = quarter;
+ b.predicate_control = s->predicate;
+ b.predicate_inverse = s->inversePredicate;
+
+ b.saturate = s->saturate;
+ b.flag_sub_reg_nr = s->subFlag;
+ b.flag_reg_nr = s->flag;
+
+ compact_table_entry key;
+ key.bit_pattern = b.data;
+
+ compact_table_entry *r = (compact_table_entry *)bsearch(&key, control_table,
+ sizeof(control_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
+ if (r == NULL)
+ return -1;
+ return r->index;
+ }
+
+ int compactDataTypeBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) {
+
+ // compact does not support any indirect acess
+ if(dst->address_mode != GEN_ADDRESS_DIRECT)
+ return -1;
+
+ if(src0->file == GEN_IMMEDIATE_VALUE)
+ return -1;
+
+ DataTypeBits b;
+ b.data = 0;
+
+ b.dest_horiz_stride = dst->hstride == GEN_HORIZONTAL_STRIDE_0 ? GEN_HORIZONTAL_STRIDE_1 : dst->hstride;
+ b.dest_address_mode = dst->address_mode;
+ b.dest_reg_file = dst->file;
+ b.dest_reg_type = dst->type;
+
+ b.src0_reg_file = src0->file;
+ b.src0_reg_type = src0->type;
+
+ if(src1) {
+ b.src1_reg_type = src1->type;
+ b.src1_reg_file = src1->file;
+ } else {
+ // default to zero
+ b.src1_reg_type = 0;
+ b.src1_reg_file = 0;
+ }
+
+ compact_table_entry key;
+ key.bit_pattern = b.data;
+
+ compact_table_entry *r = (compact_table_entry *)bsearch(&key, data_type_table,
+ sizeof(data_type_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
+ if (r == NULL)
+ return -1;
+ return r->index;
+ }
+ int compactSubRegBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) {
+ SubRegBits b;
+ b.data = 0;
+ b.dest_subreg_nr = dst->subnr;
+ b.src0_subreg_nr = src0->subnr;
+ if(src1)
+ b.src1_subreg_nr = src1->subnr;
+ else
+ b.src1_subreg_nr = 0;
+
+ compact_table_entry key;
+ key.bit_pattern = b.data;
+
+ compact_table_entry *r = (compact_table_entry *)bsearch(&key, subreg_table,
+ sizeof(subreg_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
+ if (r == NULL)
+ return -1;
+ return r->index;
+ }
+ int compactSrcRegBits(GenEncoder *p, GenRegister *src) {
+ // As we only use GEN_ALIGN_1 and compact only support direct register access,
+ // we only need to verify [hstride, width, vstride]
+ if(src->file == GEN_IMMEDIATE_VALUE)
+ return -1;
+ if(src->address_mode != GEN_ADDRESS_DIRECT)
+ return -1;
+
+ SrcRegBits b;
+ b.data = 0;
+ b.src_abs = src->absolute;
+ b.src_negate = src->negation;
+ b.src_address_mode = src->address_mode;
+ if(p->curr.execWidth == 1 && src->width == GEN_WIDTH_1) {
+ b.src_width = src->width;
+ b.src_horiz_stride = GEN_HORIZONTAL_STRIDE_0;
+ b.src_vert_stride = GEN_VERTICAL_STRIDE_0;
+ }
+ else {
+ b.src_horiz_stride = src->hstride;
+ b.src_width = src->width;
+ b.src_vert_stride = src->vstride;
+ }
+ compact_table_entry key;
+ key.bit_pattern = b.data;
+
+ compact_table_entry *r = (compact_table_entry *)bsearch(&key, srcreg_table,
+ sizeof(srcreg_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key);
+ if (r == NULL)
+ return -1;
+ return r->index;
+ }
+
+ bool compactAlu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src, uint32_t condition, bool split) {
+ if(split) {
+ // TODO support it
+ return false;
+ } else {
+ int control_index = compactControlBits(p, p->curr.quarterControl, p->curr.execWidth);
+ if(control_index == -1) return false;
+
+ int data_type_index = compactDataTypeBits(p, &dst, &src, NULL);
+ if(data_type_index == -1) return false;
+
+ int sub_reg_index = compactSubRegBits(p, &dst, &src, NULL);
+ if(sub_reg_index == -1) return false;
+
+ int src_reg_index = compactSrcRegBits(p, &src);
+ if(src_reg_index == -1) return false;
+
+ GenCompactInstruction * insn = p->nextCompact(opcode);
+ insn->bits1.control_index = control_index;
+ insn->bits1.data_type_index = data_type_index;
+ insn->bits1.sub_reg_index = sub_reg_index;
+ insn->bits1.acc_wr_control = p->curr.accWrEnable;
+ insn->bits1.destreg_or_condmod = condition;
+ insn->bits1.cmpt_control = 1;
+ insn->bits1.src0_index_lo = src_reg_index & 3;
+
+ insn->bits2.src0_index_hi = src_reg_index >> 2;
+ insn->bits2.src1_index = 0;
+ insn->bits2.dest_reg_nr = dst.nr;
+ insn->bits2.src0_reg_nr = src.nr;
+ insn->bits2.src1_reg_nr = 0;
+ return true;
+ }
+ }
+
+ bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split) {
+ if(split) {
+ // TODO support it
+ return false;
+ } else {
+ if(opcode == GEN_OPCODE_IF || opcode == GEN_OPCODE_ENDIF || opcode == GEN_OPCODE_JMPI) return false;
+
+ int control_index = compactControlBits(p, p->curr.quarterControl, p->curr.execWidth);
+ if(control_index == -1) return false;
+
+ int data_type_index = compactDataTypeBits(p, &dst, &src0, &src1);
+ if(data_type_index == -1) return false;
+
+ int sub_reg_index = compactSubRegBits(p, &dst, &src0, &src1);
+ if(sub_reg_index == -1) return false;
+
+ int src0_reg_index = compactSrcRegBits(p, &src0);
+ if(src0_reg_index == -1) return false;
+
+ bool src1_imm = false;
+ int src1_reg_index;
+ if(src1.file == GEN_IMMEDIATE_VALUE) {
+ if(src1.absolute != 0 || src1.negation != 0 || src1.type == GEN_TYPE_F)
+ return false;
+ if(src1.value.d < -4096 || src1.value.d > 4095) // 13bit signed imm
+ return false;
+ src1_imm = true;
+ } else {
+ src1_reg_index = compactSrcRegBits(p, &src1);
+ if(src1_reg_index == -1) return false;
+ }
+ GenCompactInstruction * insn = p->nextCompact(opcode);
+ insn->bits1.control_index = control_index;
+ insn->bits1.data_type_index = data_type_index;
+ insn->bits1.sub_reg_index = sub_reg_index;
+ insn->bits1.acc_wr_control = p->curr.accWrEnable;
+ insn->bits1.destreg_or_condmod = condition;
+ insn->bits1.cmpt_control = 1;
+ insn->bits1.src0_index_lo = src0_reg_index & 3;
+
+ insn->bits2.src0_index_hi = src0_reg_index >> 2;
+ insn->bits2.src1_index = src1_imm ? (src1.value.ud & 8191)>> 8 : src1_reg_index;
+ insn->bits2.dest_reg_nr = dst.nr;
+ insn->bits2.src0_reg_nr = src0.nr;
+ insn->bits2.src1_reg_nr = src1_imm ? (src1.value.ud & 0xff): src1.nr;
+ return true;
+ }
+ }
+};
sel.push();
sel.curr.noMask = 1;
sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1);
+ sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
sel.pop();
if (sel.block->hasBarrier) {
cmp0->state.subFlag = insn.state.subFlag;
cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
cmp0->src(1) = GenRegister::immuw(0);
- cmp0->dst(0) = GenRegister::null();
+ cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
cmp0->extra.function = GEN_CONDITIONAL_NEQ;
insn.prepend(*cmp0);
validatedFlags.insert(insn.state.flagIndex);
cmp0->state.subFlag = insn.state.subFlag;
cmp0->src(0) = GenRegister::uw8grf(ir::Register(insn.state.flagIndex));
cmp0->src(1) = GenRegister::immuw(0);
- cmp0->dst(0) = GenRegister::null();
+ cmp0->dst(0) = GenRegister::retype(GenRegister::null(), GEN_TYPE_UW);
cmp0->extra.function = GEN_CONDITIONAL_NEQ;
insn.prepend(*cmp0);
}
static INLINE GenRegister immuw(uint16_t uw) {
GenRegister immediate = imm(GEN_TYPE_UW);
- immediate.value.ud = uw | (uw << 16);
+ immediate.value.ud = uw;
return immediate;
}
static INLINE GenRegister immw(int16_t w) {
GenRegister immediate = imm(GEN_TYPE_W);
- immediate.value.d = w | (w << 16);
+ immediate.value.d = w;
return immediate;
}