From a7911620f61a10f6297155d58445ed146bbcb056 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Sat, 24 Jan 2015 01:12:04 +0800 Subject: [PATCH] ilo: update the toy compiler for Gen8 Based on what we know from the classic driver. --- src/gallium/drivers/ilo/shader/toy_compiler.h | 3 + src/gallium/drivers/ilo/shader/toy_compiler_asm.c | 245 ++++++++++++---- .../drivers/ilo/shader/toy_compiler_disasm.c | 324 ++++++++++++++++++--- src/gallium/drivers/ilo/shader/toy_compiler_reg.h | 8 +- src/gallium/drivers/ilo/shader/toy_helpers.h | 12 +- 5 files changed, 501 insertions(+), 91 deletions(-) diff --git a/src/gallium/drivers/ilo/shader/toy_compiler.h b/src/gallium/drivers/ilo/shader/toy_compiler.h index 190a9f4..9397c20 100644 --- a/src/gallium/drivers/ilo/shader/toy_compiler.h +++ b/src/gallium/drivers/ilo/shader/toy_compiler.h @@ -141,6 +141,9 @@ struct toy_compaction_table { uint32_t datatype[32]; uint32_t subreg[32]; uint32_t src[32]; + + uint32_t control_3src[4]; + uint64_t source_3src[4]; }; /** diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_asm.c b/src/gallium/drivers/ilo/shader/toy_compiler_asm.c index 95ae792..0977d60 100644 --- a/src/gallium/drivers/ilo/shader/toy_compiler_asm.c +++ b/src/gallium/drivers/ilo/shader/toy_compiler_asm.c @@ -31,9 +31,11 @@ #define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT) struct codegen { + const struct ilo_dev_info *dev; const struct toy_inst *inst; int pc; + unsigned flag_reg_num; unsigned flag_sub_reg_num; struct codegen_dst { @@ -384,11 +386,36 @@ static const struct toy_compaction_table toy_compaction_table_gen7 = { }, }; +static const struct toy_compaction_table toy_compaction_table_gen8 = { + .control = { + }, + .datatype = { + }, + .subreg = { + }, + .src = { + }, + .control_3src = { + }, + .source_3src = { + }, +}; + const struct toy_compaction_table * toy_compiler_get_compaction_table(const struct ilo_dev_info *dev) { - return (ilo_dev_gen(dev) >= ILO_GEN(7)) ? - &toy_compaction_table_gen7 : &toy_compaction_table_gen6; + switch (ilo_dev_gen(dev)) { + case ILO_GEN(8): + return &toy_compaction_table_gen8; + case ILO_GEN(7.5): + case ILO_GEN(7): + return &toy_compaction_table_gen7; + case ILO_GEN(6): + return &toy_compaction_table_gen6; + default: + assert(!"unsupported gen"); + return NULL; + } } /** @@ -407,21 +434,29 @@ src_is_null(const struct codegen *cg, int idx) * Translate a source operand to DW2 or DW3 of the 1-src/2-src format. */ static uint32_t -translate_src(const struct codegen *cg, int idx) +translate_src_gen6(const struct codegen *cg, int idx) { const struct codegen_src *src = &cg->src[idx]; uint32_t dw; + ILO_DEV_ASSERT(cg->dev, 6, 8); + /* special treatment may be needed if any of the operand is immediate */ if (cg->src[0].file == GEN6_FILE_IMM) { assert(!cg->src[0].absolute && !cg->src[0].negate); /* only the last src operand can be an immediate */ assert(src_is_null(cg, 1)); - if (idx == 0) - return cg->flag_sub_reg_num << 25; - else + if (idx == 0) { + if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { + return cg->src[1].type << 27 | + cg->src[1].file << 25; + } else { + return cg->flag_sub_reg_num << 25; + } + } else { return cg->src[0].origin; + } } else if (idx && cg->src[1].file == GEN6_FILE_IMM) { assert(!cg->src[1].absolute && !cg->src[1].negate); @@ -449,10 +484,15 @@ translate_src(const struct codegen *cg, int idx) GEN6_ADDRMODE_INDIRECT << 15 | src->negate << 14 | src->absolute << 13 | - src->indirect_subreg << 10 | - (src->origin & 0x3f0) | src->swizzle[1] << 2 | src->swizzle[0]; + if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { + dw |= src->indirect_subreg << 9 | + (src->origin & 0x1f0); + } else { + dw |= src->indirect_subreg << 10 | + (src->origin & 0x3f0); + } } else { assert(src->swizzle[0] == TOY_SWIZZLE_X && @@ -465,9 +505,14 @@ translate_src(const struct codegen *cg, int idx) src->horz_stride << 16 | GEN6_ADDRMODE_INDIRECT << 15 | src->negate << 14 | - src->absolute << 13 | - src->indirect_subreg << 10 | - (src->origin & 0x3ff); + src->absolute << 13; + if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { + dw |= src->indirect_subreg << 9 | + (src->origin & 0x1ff); + } else { + dw |= src->indirect_subreg << 10 | + (src->origin & 0x3ff); + } } } else { @@ -521,8 +566,21 @@ translate_src(const struct codegen *cg, int idx) } } - if (idx == 0) - dw |= cg->flag_sub_reg_num << 25; + if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { + const bool indirect_origin_bit9 = (cg->dst.indirect) ? + (src->origin & 0x200) : 0; + + if (idx == 0) { + dw |= indirect_origin_bit9 << 31 | + cg->src[1].type << 27 | + cg->src[1].file << 25; + } else { + dw |= indirect_origin_bit9 << 25; + } + } else { + if (idx == 0) + dw |= cg->flag_sub_reg_num << 25; + } return dw; } @@ -532,11 +590,13 @@ translate_src(const struct codegen *cg, int idx) * 1-src/2-src format. */ static uint16_t -translate_dst_region(const struct codegen *cg) +translate_dst_region_gen6(const struct codegen *cg) { const struct codegen_dst *dst = &cg->dst; uint16_t dw1_region; + ILO_DEV_ASSERT(cg->dev, 6, 8); + if (dst->file == GEN6_FILE_IMM) { /* dst is immediate (JIP) when the opcode is a conditional branch */ switch (cg->inst->opcode) { @@ -575,17 +635,27 @@ translate_dst_region(const struct codegen *cg) dw1_region = GEN6_ADDRMODE_INDIRECT << 15 | dst->horz_stride << 13 | - dst->indirect_subreg << 10 | - (dst->origin & 0x3f0) | dst->writemask; + if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { + dw1_region |= dst->indirect_subreg << 9 | + (dst->origin & 0x1f0); + } else { + dw1_region |= dst->indirect_subreg << 10 | + (dst->origin & 0x3f0); + } } else { assert(dst->writemask == TOY_WRITEMASK_XYZW); dw1_region = GEN6_ADDRMODE_INDIRECT << 15 | - dst->horz_stride << 13 | - dst->indirect_subreg << 10 | - (dst->origin & 0x3ff); + dst->horz_stride << 13; + if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { + dw1_region |= dst->indirect_subreg << 9 | + (dst->origin & 0x1ff); + } else { + dw1_region |= dst->indirect_subreg << 10 | + (dst->origin & 0x3ff); + } } } else { @@ -621,9 +691,11 @@ translate_dst_region(const struct codegen *cg) * Translate the destination operand to DW1 of the 1-src/2-src format. */ static uint32_t -translate_dst(const struct codegen *cg) +translate_dst_gen6(const struct codegen *cg) { - return translate_dst_region(cg) << 16 | + ILO_DEV_ASSERT(cg->dev, 6, 7.5); + + return translate_dst_region_gen6(cg) << 16 | cg->src[1].type << 12 | cg->src[1].file << 10 | cg->src[0].type << 7 | @@ -632,15 +704,36 @@ translate_dst(const struct codegen *cg) cg->dst.file; } +static uint32_t +translate_dst_gen8(const struct codegen *cg) +{ + const bool indirect_origin_bit9 = (cg->dst.indirect) ? + (cg->dst.origin & 0x200) : 0; + + ILO_DEV_ASSERT(cg->dev, 8, 8); + + return translate_dst_region_gen6(cg) << 16 | + indirect_origin_bit9 << 15 | + cg->src[0].type << 11 | + cg->src[0].file << 9 | + cg->dst.type << 5 | + cg->dst.file << 3 | + cg->inst->mask_ctrl << 2 | + cg->flag_reg_num << 1 | + cg->flag_sub_reg_num; +} + /** * Translate the instruction to DW0 of the 1-src/2-src format. */ static uint32_t -translate_inst(const struct codegen *cg) +translate_inst_gen6(const struct codegen *cg) { const bool debug_ctrl = false; const bool cmpt_ctrl = false; + ILO_DEV_ASSERT(cg->dev, 6, 7.5); + assert(cg->inst->opcode < 128); return cg->inst->saturate << 31 | @@ -659,16 +752,49 @@ translate_inst(const struct codegen *cg) cg->inst->opcode; } +static uint32_t +translate_inst_gen8(const struct codegen *cg) +{ + const bool debug_ctrl = false; + const bool cmpt_ctrl = false; + + ILO_DEV_ASSERT(cg->dev, 8, 8); + + assert(cg->inst->opcode < 128); + + return cg->inst->saturate << 31 | + debug_ctrl << 30 | + cmpt_ctrl << 29 | + cg->inst->acc_wr_ctrl << 28 | + cg->inst->cond_modifier << 24 | + cg->inst->exec_size << 21 | + cg->inst->pred_inv << 20 | + cg->inst->pred_ctrl << 16 | + cg->inst->thread_ctrl << 14 | + cg->inst->qtr_ctrl << 12 | + cg->inst->dep_ctrl << 9 | + cg->inst->access_mode << 8 | + cg->inst->opcode; +} + /** * Codegen an instruction in 1-src/2-src format. */ static void -codegen_inst(const struct codegen *cg, uint32_t *code) +codegen_inst_gen6(const struct codegen *cg, uint32_t *code) { - code[0] = translate_inst(cg); - code[1] = translate_dst(cg); - code[2] = translate_src(cg, 0); - code[3] = translate_src(cg, 1); + ILO_DEV_ASSERT(cg->dev, 6, 8); + + if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { + code[0] = translate_inst_gen8(cg); + code[1] = translate_dst_gen8(cg); + } else { + code[0] = translate_inst_gen6(cg); + code[1] = translate_dst_gen6(cg); + } + + code[2] = translate_src_gen6(cg, 0); + code[3] = translate_src_gen6(cg, 1); assert(src_is_null(cg, 2)); } @@ -676,13 +802,18 @@ codegen_inst(const struct codegen *cg, uint32_t *code) * Codegen an instruction in 3-src format. */ static void -codegen_inst_3src(const struct codegen *cg, uint32_t *code) +codegen_inst_3src_gen6(const struct codegen *cg, uint32_t *code) { const struct codegen_dst *dst = &cg->dst; uint32_t dw0, dw1, dw_src[3]; int i; - dw0 = translate_inst(cg); + ILO_DEV_ASSERT(cg->dev, 6, 8); + + if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) + dw0 = translate_inst_gen8(cg); + else + dw0 = translate_inst_gen6(cg); /* * 3-src instruction restrictions @@ -697,30 +828,42 @@ codegen_inst_3src(const struct codegen *cg, uint32_t *code) assert(cg->inst->access_mode == GEN6_ALIGN_16); assert(!dst->indirect); - assert((dst->file == GEN6_FILE_GRF && - CG_REG_NUM(dst->origin) < 128) || - (dst->file == GEN6_FILE_MRF && - CG_REG_NUM(dst->origin) < 16)); + assert((dst->file == GEN6_FILE_GRF && CG_REG_NUM(dst->origin) < 128) || + (dst->file == GEN6_FILE_MRF && CG_REG_NUM(dst->origin) < 16)); assert(!(dst->origin & 0x3)); assert(dst->horz_stride == GEN6_HORZSTRIDE_1); - dw1 = dst->origin << 19 | - dst->writemask << 17 | - cg->src[2].negate << 9 | - cg->src[2].absolute << 8 | - cg->src[1].negate << 7 | - cg->src[1].absolute << 6 | - cg->src[0].negate << 5 | - cg->src[0].absolute << 4 | - cg->flag_sub_reg_num << 1 | - (dst->file == GEN6_FILE_MRF); + if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { + dw1 = dst->origin << 19 | + dst->writemask << 17 | + cg->src[2].negate << 10 | + cg->src[2].negate << 10 | + cg->src[2].absolute << 9 | + cg->src[1].negate << 8 | + cg->src[1].absolute << 7 | + cg->src[0].negate << 6 | + cg->src[0].absolute << 5 | + cg->inst->mask_ctrl << 2 | + cg->flag_reg_num << 1 | + cg->flag_sub_reg_num; + } else { + dw1 = dst->origin << 19 | + dst->writemask << 17 | + cg->src[2].negate << 9 | + cg->src[2].absolute << 8 | + cg->src[1].negate << 7 | + cg->src[1].absolute << 6 | + cg->src[0].negate << 5 | + cg->src[0].absolute << 4 | + cg->flag_sub_reg_num << 1 | + (dst->file == GEN6_FILE_MRF); + } for (i = 0; i < 3; i++) { const struct codegen_src *src = &cg->src[i]; assert(!src->indirect); - assert(src->file == GEN6_FILE_GRF && - CG_REG_NUM(src->origin) < 128); + assert(src->file == GEN6_FILE_GRF && CG_REG_NUM(src->origin) < 128); assert(!(src->origin & 0x3)); assert((src->vert_stride == GEN6_VERTSTRIDE_4 && @@ -881,14 +1024,16 @@ translate_swizzle(enum toy_swizzle swizzle) * Prepare for generating an instruction. */ static void -codegen_prepare(struct codegen *cg, const struct toy_inst *inst, - int pc, int rect_linear_width) +codegen_prepare(struct codegen *cg, const struct ilo_dev_info *dev, + const struct toy_inst *inst, int pc, int rect_linear_width) { int i; + cg->dev = dev; cg->inst = inst; cg->pc = pc; + cg->flag_reg_num = 0; cg->flag_sub_reg_num = 0; cg->dst.file = translate_vfile(inst->dst.file); @@ -1048,15 +1193,15 @@ toy_compiler_assemble(struct toy_compiler *tc, int *size) break; } - codegen_prepare(&cg, inst, pc, tc->rect_linear_width); + codegen_prepare(&cg, tc->dev, inst, pc, tc->rect_linear_width); codegen_validate_region_restrictions(&cg); switch (inst->opcode) { case GEN6_OPCODE_MAD: - codegen_inst_3src(&cg, dw); + codegen_inst_3src_gen6(&cg, dw); break; default: - codegen_inst(&cg, dw); + codegen_inst_gen6(&cg, dw); break; } diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c b/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c index cd6e69b..1434736 100644 --- a/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c +++ b/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c @@ -39,7 +39,7 @@ struct disasm_printer { struct disasm_operand { unsigned file:2; - unsigned type:3; + unsigned type:4; unsigned addr_mode:1; unsigned reg:8; @@ -90,6 +90,8 @@ struct disasm_inst { unsigned fc:4; unsigned acc_wr_ctrl:1; + unsigned branch_ctrl:1; + unsigned cmpt_ctrl:1; unsigned debug_ctrl:1; unsigned saturate:1; @@ -147,6 +149,7 @@ static const struct { [GEN75_OPCODE_CALLA] = { "calla", 1 }, [GEN6_OPCODE_CALL] = { "call", 1 }, [GEN6_OPCODE_RETURN] = { "return", 1 }, + [GEN8_OPCODE_GOTO] = { "goto", 1 }, [GEN6_OPCODE_WAIT] = { "wait", 1 }, [GEN6_OPCODE_SEND] = { "send", 1 }, [GEN6_OPCODE_SENDC] = { "sendc", 1 }, @@ -183,6 +186,8 @@ static const struct { static void disasm_inst_decode_dw0_gen6(struct disasm_inst *inst, uint32_t dw0) { + ILO_DEV_ASSERT(inst->dev, 6, 8); + inst->opcode = GEN_EXTRACT(dw0, GEN6_INST_OPCODE); switch (inst->opcode) { @@ -190,6 +195,10 @@ disasm_inst_decode_dw0_gen6(struct disasm_inst *inst, uint32_t dw0) inst->has_jip = true; inst->has_uip = (ilo_dev_gen(inst->dev) >= ILO_GEN(7)); break; + case GEN6_OPCODE_ELSE: + inst->has_jip = true; + inst->has_uip = (ilo_dev_gen(inst->dev) >= ILO_GEN(8)); + break; case GEN6_OPCODE_BREAK: case GEN6_OPCODE_CONT: case GEN6_OPCODE_HALT: @@ -198,7 +207,6 @@ disasm_inst_decode_dw0_gen6(struct disasm_inst *inst, uint32_t dw0) case GEN6_OPCODE_JMPI: case GEN7_OPCODE_BRD: case GEN7_OPCODE_BRC: - case GEN6_OPCODE_ELSE: case GEN6_OPCODE_ENDIF: case GEN6_OPCODE_CASE: case GEN6_OPCODE_WHILE: @@ -212,8 +220,15 @@ disasm_inst_decode_dw0_gen6(struct disasm_inst *inst, uint32_t dw0) } inst->access_mode = GEN_EXTRACT(dw0, GEN6_INST_ACCESSMODE); - inst->mask_ctrl = GEN_EXTRACT(dw0, GEN6_INST_MASKCTRL); - inst->dep_ctrl = GEN_EXTRACT(dw0, GEN6_INST_DEPCTRL); + + if (ilo_dev_gen(inst->dev) >= ILO_GEN(8)) { + inst->dep_ctrl = GEN_EXTRACT(dw0, GEN8_INST_DEPCTRL); + inst->nib_ctrl = (bool) (dw0 & GEN8_INST_NIBCTRL); + } else { + inst->mask_ctrl = GEN_EXTRACT(dw0, GEN6_INST_MASKCTRL); + inst->dep_ctrl = GEN_EXTRACT(dw0, GEN6_INST_DEPCTRL); + } + inst->qtr_ctrl = GEN_EXTRACT(dw0, GEN6_INST_QTRCTRL); inst->thread_ctrl = GEN_EXTRACT(dw0, GEN6_INST_THREADCTRL); inst->pred_ctrl = GEN_EXTRACT(dw0, GEN6_INST_PREDCTRL); @@ -235,7 +250,21 @@ disasm_inst_decode_dw0_gen6(struct disasm_inst *inst, uint32_t dw0) break; } - inst->acc_wr_ctrl = (bool) (dw0 & GEN6_INST_ACCWRCTRL); + if (ilo_dev_gen(inst->dev) >= ILO_GEN(8)) { + switch (inst->opcode) { + case GEN6_OPCODE_IF: + case GEN6_OPCODE_ELSE: + case GEN8_OPCODE_GOTO: + inst->branch_ctrl = (bool) (dw0 & GEN8_INST_BRANCHCTRL); + break; + default: + inst->acc_wr_ctrl = (bool) (dw0 & GEN6_INST_ACCWRCTRL); + break; + } + } else { + inst->acc_wr_ctrl = (bool) (dw0 & GEN6_INST_ACCWRCTRL); + } + inst->cmpt_ctrl = (bool) (dw0 & GEN6_INST_CMPTCTRL); inst->debug_ctrl = (bool) (dw0 & GEN6_INST_DEBUGCTRL); inst->saturate = (bool) (dw0 & GEN6_INST_SATURATE); @@ -251,15 +280,28 @@ disasm_inst_jip_in_dw1_high_gen6(const struct disasm_inst *inst) static void disasm_inst_decode_dw1_gen6(struct disasm_inst *inst, uint32_t dw1) { - inst->dst.base.file = GEN_EXTRACT(dw1, GEN6_INST_DST_FILE); - inst->dst.base.type = GEN_EXTRACT(dw1, GEN6_INST_DST_TYPE); - inst->src0.base.file = GEN_EXTRACT(dw1, GEN6_INST_SRC0_FILE); - inst->src0.base.type = GEN_EXTRACT(dw1, GEN6_INST_SRC0_TYPE); - inst->src1.base.file = GEN_EXTRACT(dw1, GEN6_INST_SRC1_FILE); - inst->src1.base.type = GEN_EXTRACT(dw1, GEN6_INST_SRC1_TYPE); + ILO_DEV_ASSERT(inst->dev, 6, 8); - if (ilo_dev_gen(inst->dev) >= ILO_GEN(7)) - inst->nib_ctrl = (bool) (dw1 & GEN7_INST_NIBCTRL); + if (ilo_dev_gen(inst->dev) >= ILO_GEN(8)) { + inst->flag_subreg = GEN_EXTRACT(dw1, GEN8_INST_FLAG_SUBREG); + inst->flag_reg = GEN_EXTRACT(dw1, GEN8_INST_FLAG_REG); + inst->mask_ctrl = GEN_EXTRACT(dw1, GEN8_INST_MASKCTRL); + + inst->dst.base.file = GEN_EXTRACT(dw1, GEN8_INST_DST_FILE); + inst->dst.base.type = GEN_EXTRACT(dw1, GEN8_INST_DST_TYPE); + inst->src0.base.file = GEN_EXTRACT(dw1, GEN8_INST_SRC0_FILE); + inst->src0.base.type = GEN_EXTRACT(dw1, GEN8_INST_SRC0_TYPE); + } else { + inst->dst.base.file = GEN_EXTRACT(dw1, GEN6_INST_DST_FILE); + inst->dst.base.type = GEN_EXTRACT(dw1, GEN6_INST_DST_TYPE); + inst->src0.base.file = GEN_EXTRACT(dw1, GEN6_INST_SRC0_FILE); + inst->src0.base.type = GEN_EXTRACT(dw1, GEN6_INST_SRC0_TYPE); + inst->src1.base.file = GEN_EXTRACT(dw1, GEN6_INST_SRC1_FILE); + inst->src1.base.type = GEN_EXTRACT(dw1, GEN6_INST_SRC1_TYPE); + + if (ilo_dev_gen(inst->dev) >= ILO_GEN(7)) + inst->nib_ctrl = (bool) (dw1 & GEN7_INST_NIBCTRL); + } if (disasm_inst_jip_in_dw1_high_gen6(inst)) { inst->u.imm32 = dw1 >> 16; @@ -279,14 +321,34 @@ disasm_inst_decode_dw1_gen6(struct disasm_inst *inst, uint32_t dw1) GEN6_INST_DST_SUBREG_ALIGN16__SHR; } } else { - inst->dst.base.addr_subreg = GEN_EXTRACT(dw1, GEN6_INST_DST_ADDR_SUBREG); + if (ilo_dev_gen(inst->dev) >= ILO_GEN(8)) { + inst->dst.base.addr_subreg = + GEN_EXTRACT(dw1, GEN8_INST_DST_ADDR_SUBREG); - if (inst->access_mode == GEN6_ALIGN_1) { - inst->dst.base.addr_imm = GEN_EXTRACT(dw1, GEN6_INST_DST_ADDR_IMM); + if (inst->access_mode == GEN6_ALIGN_1) { + inst->dst.base.addr_imm = + GEN_EXTRACT(dw1, GEN8_INST_DST_ADDR_IMM); + } else { + inst->dst.base.addr_imm = GEN_EXTRACT(dw1, + GEN8_INST_DST_ADDR_IMM_ALIGN16) << + GEN8_INST_DST_ADDR_IMM_ALIGN16__SHR; + } + + inst->dst.base.addr_imm |= GEN_EXTRACT(dw1, + GEN8_INST_DST_ADDR_IMM_BIT9) << + GEN8_INST_DST_ADDR_IMM_BIT9__SHR; } else { - inst->dst.base.addr_imm = GEN_EXTRACT(dw1, - GEN6_INST_DST_ADDR_IMM_ALIGN16) << - GEN6_INST_DST_ADDR_IMM_ALIGN16__SHR; + inst->dst.base.addr_subreg = + GEN_EXTRACT(dw1, GEN6_INST_DST_ADDR_SUBREG); + + if (inst->access_mode == GEN6_ALIGN_1) { + inst->dst.base.addr_imm = + GEN_EXTRACT(dw1, GEN6_INST_DST_ADDR_IMM); + } else { + inst->dst.base.addr_imm = GEN_EXTRACT(dw1, + GEN6_INST_DST_ADDR_IMM_ALIGN16) << + GEN6_INST_DST_ADDR_IMM_ALIGN16__SHR; + } } } @@ -304,10 +366,16 @@ disasm_inst_decode_dw2_dw3_gen6(struct disasm_inst *inst, { int count, i; - if (ilo_dev_gen(inst->dev) >= ILO_GEN(7)) - inst->flag_reg = GEN_EXTRACT(dw2, GEN7_INST_FLAG_REG); + ILO_DEV_ASSERT(inst->dev, 6, 8); - inst->flag_subreg = GEN_EXTRACT(dw2, GEN6_INST_FLAG_SUBREG); + if (ilo_dev_gen(inst->dev) >= ILO_GEN(8)) { + inst->src1.base.file = GEN_EXTRACT(dw2, GEN8_INST_SRC1_FILE); + inst->src1.base.type = GEN_EXTRACT(dw2, GEN8_INST_SRC1_TYPE); + } else { + if (ilo_dev_gen(inst->dev) >= ILO_GEN(7)) + inst->flag_reg = GEN_EXTRACT(dw2, GEN7_INST_FLAG_REG); + inst->flag_subreg = GEN_EXTRACT(dw2, GEN6_INST_FLAG_SUBREG); + } if (inst->src0.base.file == GEN6_FILE_IMM || inst->src1.base.file == GEN6_FILE_IMM) { @@ -334,14 +402,38 @@ disasm_inst_decode_dw2_dw3_gen6(struct disasm_inst *inst, GEN6_INST_SRC_SUBREG_ALIGN16__SHR; } } else { - src->base.addr_subreg = GEN_EXTRACT(dw, GEN6_INST_SRC_ADDR_SUBREG); - - if (inst->access_mode == GEN6_ALIGN_1) { - src->base.addr_imm = GEN_EXTRACT(dw, GEN6_INST_SRC_ADDR_IMM); + if (ilo_dev_gen(inst->dev) >= ILO_GEN(8)) { + src->base.addr_subreg = + GEN_EXTRACT(dw, GEN8_INST_SRC_ADDR_SUBREG); + + if (inst->access_mode == GEN6_ALIGN_1) { + src->base.addr_imm = GEN_EXTRACT(dw, GEN8_INST_SRC_ADDR_IMM); + } else { + src->base.addr_imm = + GEN_EXTRACT(dw, GEN8_INST_SRC_ADDR_IMM_ALIGN16) << + GEN8_INST_SRC_ADDR_IMM_ALIGN16__SHR; + } + + if (i == 0) { + inst->dst.base.addr_imm |= GEN_EXTRACT(dw, + GEN8_INST_SRC0_ADDR_IMM_BIT9) << + GEN8_INST_SRC0_ADDR_IMM_BIT9__SHR; + } else { + inst->dst.base.addr_imm |= GEN_EXTRACT(dw, + GEN8_INST_SRC1_ADDR_IMM_BIT9) << + GEN8_INST_SRC1_ADDR_IMM_BIT9__SHR; + } } else { - src->base.addr_imm = - GEN_EXTRACT(dw, GEN6_INST_SRC_ADDR_IMM_ALIGN16) << - GEN6_INST_SRC_ADDR_IMM_ALIGN16__SHR; + src->base.addr_subreg = + GEN_EXTRACT(dw, GEN6_INST_SRC_ADDR_SUBREG); + + if (inst->access_mode == GEN6_ALIGN_1) { + src->base.addr_imm = GEN_EXTRACT(dw, GEN6_INST_SRC_ADDR_IMM); + } else { + src->base.addr_imm = + GEN_EXTRACT(dw, GEN6_INST_SRC_ADDR_IMM_ALIGN16) << + GEN6_INST_SRC_ADDR_IMM_ALIGN16__SHR; + } } } @@ -373,13 +465,15 @@ disasm_inst_decode_dw2_dw3_gen6(struct disasm_inst *inst, static void disasm_inst_decode_3src_dw1_gen6(struct disasm_inst *inst, uint32_t dw1) { - static unsigned type_mapping[4] = { + static const unsigned type_mapping[4] = { [GEN7_TYPE_F_3SRC] = GEN6_TYPE_F, [GEN7_TYPE_D_3SRC] = GEN6_TYPE_D, [GEN7_TYPE_UD_3SRC] = GEN6_TYPE_UD, [GEN7_TYPE_DF_3SRC] = GEN7_TYPE_DF, }; + ILO_DEV_ASSERT(inst->dev, 6, 7.5); + inst->flag_subreg = GEN_EXTRACT(dw1, GEN6_3SRC_FLAG_SUBREG); if (ilo_dev_gen(inst->dev) >= ILO_GEN(7)) { @@ -425,12 +519,63 @@ disasm_inst_decode_3src_dw1_gen6(struct disasm_inst *inst, uint32_t dw1) } static void +disasm_inst_decode_3src_dw1_gen8(struct disasm_inst *inst, uint32_t dw1) +{ + static const unsigned type_mapping[8] = { + [GEN7_TYPE_F_3SRC] = GEN6_TYPE_F, + [GEN7_TYPE_D_3SRC] = GEN6_TYPE_D, + [GEN7_TYPE_UD_3SRC] = GEN6_TYPE_UD, + [GEN7_TYPE_DF_3SRC] = GEN7_TYPE_DF, + /* map unknown types to unknown types */ + [0x4] = 0xf, + [0x5] = 0xf, + [0x6] = 0xf, + [0x7] = 0xf, + }; + + ILO_DEV_ASSERT(inst->dev, 8, 8); + + inst->flag_subreg = GEN_EXTRACT(dw1, GEN8_3SRC_FLAG_SUBREG); + inst->flag_reg = GEN_EXTRACT(dw1, GEN8_3SRC_FLAG_REG); + inst->mask_ctrl = GEN_EXTRACT(dw1, GEN8_3SRC_MASKCTRL); + inst->src0.absolute = (bool) (dw1 & GEN8_3SRC_SRC0_ABSOLUTE); + inst->src0.negate = (bool) (dw1 & GEN8_3SRC_SRC0_NEGATE); + inst->src1.negate = (bool) (dw1 & GEN8_3SRC_SRC1_NEGATE); + inst->src1.absolute = (bool) (dw1 & GEN8_3SRC_SRC1_ABSOLUTE); + inst->u.src2.negate = (bool) (dw1 & GEN8_3SRC_SRC2_NEGATE); + inst->u.src2.absolute = (bool) (dw1 & GEN8_3SRC_SRC2_ABSOLUTE); + + inst->src0.base.file = GEN6_FILE_GRF; + inst->src0.base.type = GEN_EXTRACT(dw1, GEN8_3SRC_SRC_TYPE); + inst->src0.base.type = type_mapping[inst->src0.base.type]; + + inst->src1.base.file = GEN6_FILE_GRF; + inst->src1.base.type = inst->src0.base.type; + + inst->u.src2.base.file = GEN6_FILE_GRF; + inst->u.src2.base.type = inst->src0.base.type; + + inst->dst.base.file = GEN6_FILE_GRF; + inst->dst.base.type = GEN_EXTRACT(dw1, GEN8_3SRC_DST_TYPE); + inst->dst.base.type = type_mapping[inst->dst.base.type]; + inst->dst.base.addr_mode = GEN6_ADDRMODE_DIRECT; + inst->dst.horz_stride = GEN6_HORZSTRIDE_1; + + inst->dst.writemask = GEN_EXTRACT(dw1, GEN6_3SRC_DST_WRITEMASK); + inst->dst.base.subreg = GEN_EXTRACT(dw1, GEN6_3SRC_DST_SUBREG) << + GEN6_3SRC_DST_SUBREG__SHR; + inst->dst.base.reg = GEN_EXTRACT(dw1, GEN6_3SRC_DST_REG); +} + +static void disasm_inst_decode_3src_dw2_dw3_gen6(struct disasm_inst *inst, uint32_t dw2, uint32_t dw3) { const uint64_t qw = (uint64_t) dw3 << 32 | dw2; int i; + ILO_DEV_ASSERT(inst->dev, 6, 8); + for (i = 0; i < 3; i++) { struct disasm_src_operand *src = (i == 0) ? &inst->src0 : (i == 1) ? &inst->src1 : @@ -499,7 +644,10 @@ disasm_inst_decode(struct disasm_inst *inst, disasm_inst_decode_dw0_gen6(inst, dw[0]); if (disasm_opcode_table[inst->opcode].src_count == 3) { - disasm_inst_decode_3src_dw1_gen6(inst, dw[1]); + if (ilo_dev_gen(inst->dev) >= ILO_GEN(8)) + disasm_inst_decode_3src_dw1_gen8(inst, dw[1]); + else + disasm_inst_decode_3src_dw1_gen6(inst, dw[1]); disasm_inst_decode_3src_dw2_dw3_gen6(inst, dw[2], dw[3]); } else { disasm_inst_decode_dw1_gen6(inst, dw[1]); @@ -584,6 +732,8 @@ disasm_inst_fc(const struct disasm_inst *inst) case GEN6_MATH_INT_DIV: return "int_div"; case GEN6_MATH_INT_DIV_QUOTIENT: return "int_div_quotient"; case GEN6_MATH_INT_DIV_REMAINDER: return "int_div_remainder"; + case GEN8_MATH_INVM: return "invm"; + case GEN8_MATH_RSQRTM: return "rsqrtm"; default: return "BAD"; } } @@ -785,6 +935,8 @@ disasm_inst_type(const struct disasm_inst *inst, case GEN6_TYPE_VF_IMM: return "VF"; case GEN6_TYPE_V_IMM: return "V"; case GEN6_TYPE_F: return "F"; + case GEN8_TYPE_DF_IMM: return "DF"; + case GEN8_TYPE_HF_IMM: return "HF"; default: return "BAD"; } } else { @@ -797,6 +949,9 @@ disasm_inst_type(const struct disasm_inst *inst, case GEN6_TYPE_B: return "B"; case GEN7_TYPE_DF: return "DF"; case GEN6_TYPE_F: return "F"; + case GEN8_TYPE_UQ: return "UQ"; + case GEN8_TYPE_Q: return "Q"; + case GEN8_TYPE_HF: return "HF"; default: return "BAD"; } } @@ -870,6 +1025,19 @@ disasm_inst_writemask(const struct disasm_inst *inst, unsigned writemask) static const char * disasm_inst_negate(const struct disasm_inst *inst, bool negate) { + if (ilo_dev_gen(inst->dev) >= ILO_GEN(8)) { + switch (inst->opcode) { + case GEN6_OPCODE_AND: + case GEN6_OPCODE_NOT: + case GEN6_OPCODE_OR: + case GEN6_OPCODE_XOR: + return (negate) ? "~" : ""; + break; + default: + break; + } + } + return (negate) ? "-" : ""; } @@ -1025,7 +1193,7 @@ static const char * disasm_inst_mdesc_dp_op_gen75(const struct disasm_inst *inst, int sfid, int op) { - ILO_DEV_ASSERT(inst->dev, 7.5, 7.5); + ILO_DEV_ASSERT(inst->dev, 7.5, 8); switch (sfid) { case GEN6_SFID_DP_SAMPLER: @@ -1090,6 +1258,7 @@ static const char * disasm_inst_mdesc_dp_op(const struct disasm_inst *inst, int sfid, int op) { switch (ilo_dev_gen(inst->dev)) { + case ILO_GEN(8): case ILO_GEN(7.5): return disasm_inst_mdesc_dp_op_gen75(inst, sfid, op); case ILO_GEN(7): return disasm_inst_mdesc_dp_op_gen7(inst, sfid, op); case ILO_GEN(6): return disasm_inst_mdesc_dp_op_gen6(inst, sfid, op); @@ -1429,7 +1598,9 @@ disasm_printer_add_mdesc_urb(struct disasm_printer *printer, op = GEN_EXTRACT(mdesc, GEN7_MSG_URB_OP); offset = GEN_EXTRACT(mdesc, GEN7_MSG_URB_GLOBAL_OFFSET); interleaved = mdesc & GEN7_MSG_URB_INTERLEAVED; - complete = mdesc & GEN7_MSG_URB_COMPLETE; + + complete = (ilo_dev_gen(inst->dev) >= ILO_GEN(8)) ? + false : (mdesc & GEN7_MSG_URB_COMPLETE); allocate = false; used = false; @@ -1701,6 +1872,84 @@ disasm_printer_print_inst(struct disasm_printer *printer, } static void +disasm_uncompact_3src(const struct ilo_dev_info *dev, + uint64_t compact, uint32_t *dw) +{ + const struct toy_compaction_table *tbl = + toy_compiler_get_compaction_table(dev); + uint32_t src[3], tmp; + uint64_t tmp64; + + ILO_DEV_ASSERT(dev, 8, 8); + + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_OPCODE); + dw[0] = GEN_SHIFT32(tmp, GEN6_INST_OPCODE); + + /* ControlIndex */ + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_CONTROL_INDEX); + tmp = tbl->control_3src[tmp]; + + dw[0] |= (tmp & 0x1fffff) << GEN6_INST_ACCESSMODE__SHIFT; + dw[1] = (tmp >> 21) & ((ilo_dev_gen(dev) >= ILO_GEN(9)) ? 0x1f : 0x7); + + /* SourceIndex */ + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_SOURCE_INDEX); + tmp64 = tbl->source_3src[tmp]; + + dw[1] |= (tmp64 & 0x7ffff) << 5; + src[0] = ((tmp64 >> 19) & 0xff) << 1; + src[1] = ((tmp64 >> 27) & 0xff) << 1; + src[2] = ((tmp64 >> 35) & 0xff) << 1; + if (ilo_dev_gen(dev) >= ILO_GEN(9)) { + src[0] |= ((tmp64 >> 43) & 0x3) << 19; + src[1] |= ((tmp64 >> 45) & 0x3) << 19; + src[2] |= ((tmp64 >> 47) & 0x3) << 19; + } else { + src[0] |= ((tmp64 >> 43) & 0x1) << 19; + src[1] |= ((tmp64 >> 44) & 0x1) << 19; + src[2] |= ((tmp64 >> 45) & 0x1) << 19; + } + + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_DST_REG); + dw[1] |= GEN_SHIFT32(tmp, GEN6_3SRC_DST_REG); + + if (compact & GEN8_COMPACT_3SRC_SRC0_REPCTRL) + src[0] |= GEN6_3SRC_SRC_REPCTRL; + + assert(compact & GEN8_COMPACT_3SRC_CMPTCTRL); + + if (compact & GEN8_COMPACT_3SRC_DEBUGCTRL) + dw[0] |= GEN6_INST_DEBUGCTRL; + if (compact & GEN8_COMPACT_3SRC_SATURATE) + dw[0] |= GEN6_INST_SATURATE; + + if (compact & GEN8_COMPACT_3SRC_SRC1_REPCTRL) + src[1] |= GEN6_3SRC_SRC_REPCTRL; + if (compact & GEN8_COMPACT_3SRC_SRC2_REPCTRL) + src[2] |= GEN6_3SRC_SRC_REPCTRL; + + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_SRC0_SUBREG); + src[0] |= GEN_SHIFT32(tmp, GEN6_3SRC_SRC_SUBREG); + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_SRC1_SUBREG); + src[1] |= GEN_SHIFT32(tmp, GEN6_3SRC_SRC_SUBREG); + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_SRC2_SUBREG); + src[2] |= GEN_SHIFT32(tmp, GEN6_3SRC_SRC_SUBREG); + + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_SRC0_REG); + src[0] |= GEN_SHIFT32(tmp, GEN6_3SRC_SRC_REG); + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_SRC1_REG); + src[1] |= GEN_SHIFT32(tmp, GEN6_3SRC_SRC_REG); + tmp = GEN_EXTRACT(compact, GEN8_COMPACT_3SRC_SRC2_REG); + src[2] |= GEN_SHIFT32(tmp, GEN6_3SRC_SRC_REG); + + tmp64 = (uint64_t) src[2] << 42 | + (uint64_t) src[1] << 21 | + (uint64_t) src[0]; + dw[2] = (uint32_t) tmp64; + dw[3] = (uint32_t) (tmp64 >> 32); +} + +static void disasm_uncompact(const struct ilo_dev_info *dev, uint64_t compact, uint32_t *dw) { @@ -1709,9 +1958,16 @@ disasm_uncompact(const struct ilo_dev_info *dev, bool src_is_imm; uint32_t tmp; - memset(dw, 0, sizeof(*dw) * 4); + ILO_DEV_ASSERT(dev, 6, 8); tmp = GEN_EXTRACT(compact, GEN6_COMPACT_OPCODE); + if (disasm_opcode_table[tmp].src_count == 3) { + disasm_uncompact_3src(dev, compact, dw); + return; + } + + memset(dw, 0, sizeof(*dw) * 4); + dw[0] |= GEN_SHIFT32(tmp, GEN6_INST_OPCODE); if (ilo_dev_gen(dev) >= ILO_GEN(7) && (compact & GEN6_COMPACT_DEBUGCTRL)) diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_reg.h b/src/gallium/drivers/ilo/shader/toy_compiler_reg.h index 36674f8..70ad3fc 100644 --- a/src/gallium/drivers/ilo/shader/toy_compiler_reg.h +++ b/src/gallium/drivers/ilo/shader/toy_compiler_reg.h @@ -118,13 +118,13 @@ enum toy_writemask { */ struct toy_dst { unsigned file:3; /* TOY_FILE_x */ - unsigned type:3; /* TOY_TYPE_x */ + unsigned type:4; /* TOY_TYPE_x */ unsigned rect:3; /* TOY_RECT_x */ unsigned indirect:1; /* true or false */ unsigned indirect_subreg:6; /* which subreg of a0? */ unsigned writemask:4; /* TOY_WRITEMASK_x */ - unsigned pad:12; + unsigned pad:11; uint32_t val32; }; @@ -134,7 +134,7 @@ struct toy_dst { */ struct toy_src { unsigned file:3; /* TOY_FILE_x */ - unsigned type:3; /* TOY_TYPE_x */ + unsigned type:4; /* TOY_TYPE_x */ unsigned rect:3; /* TOY_RECT_x */ unsigned indirect:1; /* true or false */ unsigned indirect_subreg:6; /* which subreg of a0? */ @@ -145,7 +145,7 @@ struct toy_src { unsigned swizzle_w:2; /* TOY_SWIZZLE_x */ unsigned absolute:1; /* true or false */ unsigned negate:1; /* true or false */ - unsigned pad:6; + unsigned pad:5; uint32_t val32; }; diff --git a/src/gallium/drivers/ilo/shader/toy_helpers.h b/src/gallium/drivers/ilo/shader/toy_helpers.h index ec6acad..0c507d2 100644 --- a/src/gallium/drivers/ilo/shader/toy_helpers.h +++ b/src/gallium/drivers/ilo/shader/toy_helpers.h @@ -264,7 +264,14 @@ tsrc_imm_mdesc_urb(const struct toy_compiler *tc, const bool header_present = true; uint32_t ctrl; - if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { + if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) { + const bool per_slot_offset = false; + + ctrl = per_slot_offset << 17 | + swizzle_control << 15 | + global_offset << 4 | + urb_opcode; + } else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { const bool per_slot_offset = false; ctrl = per_slot_offset << 16 | @@ -272,8 +279,7 @@ tsrc_imm_mdesc_urb(const struct toy_compiler *tc, swizzle_control << 14 | global_offset << 3 | urb_opcode; - } - else { + } else { ctrl = complete << 15 | used << 14 | allocate << 13 | -- 2.7.4