From 26c84794745463c24a35e76ea67674de5d066ff5 Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Sat, 29 Dec 2012 20:12:26 +1300 Subject: [PATCH] i965/fs: add support for ir_txf_ms on Gen6+ On Gen6, lower this to `ld` with lod=0 and an extra sample_index parameter. On Gen7, use `ld2dms`. We don't support CMS yet for multisample textures, so we just hardcode MCS=0. This is ignored for IMS and UMS surfaces. Note: If we do end up emitting specialized shaders based on the MSAA layout, we can emit a slightly shorter message here in the UMS case. Note: According to the PRM, `ld2dms` takes one more parameter, lod. However, it's always zero, and including it would make the message too long for SIMD16, so we just omit it. V2: Reworked completely, added support for Gen7. V3: - Introduce sample_index parameter rather than reusing lod - Removed spurious whitespace change - Clarify commit message V4: - Fix comment style - Emit SHADER_OPCODE_TXF_MS on Gen6. This was benignly wrong since it lowers to `ld` anyway on this gen, but still wrong. Signed-off-by: Chris Forbes Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_fs.h | 6 ++- src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 5 ++- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 56 +++++++++++++++++++++++----- 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 17ef046..f7ccc79 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -356,9 +356,11 @@ public: fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, fs_reg shadow_comp, fs_reg lod, fs_reg lod2); fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_comp, fs_reg lod, fs_reg lod2); + fs_reg shadow_comp, fs_reg lod, fs_reg lod2, + fs_reg sample_index); fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_comp, fs_reg lod, fs_reg lod2); + fs_reg shadow_comp, fs_reg lod, fs_reg lod2, + fs_reg sample_index); fs_reg fix_math_operand(fs_reg src); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp index 50e63da..3c0ba24 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp @@ -412,6 +412,7 @@ fs_visitor::emit_fragment_program_code() fs_reg dpdy; fs_reg coordinate = src[0]; fs_reg shadow_c; + fs_reg sample_index; switch (fpi->Opcode) { case OPCODE_TEX: @@ -500,9 +501,9 @@ fs_visitor::emit_fragment_program_code() fs_inst *inst; if (intel->gen >= 7) { - inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy); + inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index); } else if (intel->gen >= 5) { - inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy); + inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index); } else { inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index de08b0b..92bc621 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -944,6 +944,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break; + default: + fail("unrecognized texture opcode"); } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -970,7 +972,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, */ fs_inst * fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_c, fs_reg lod, fs_reg lod2) + fs_reg shadow_c, fs_reg lod, fs_reg lod2, + fs_reg sample_index) { int mlen = 0; int base_mrf = 2; @@ -1068,11 +1071,19 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, break; case ir_txf: mlen = header_present + 4 * reg_width; - - emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), - lod)); + emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), lod)); inst = emit(SHADER_OPCODE_TXF, dst); break; + case ir_txf_ms: + mlen = header_present + 4 * reg_width; + + /* lod */ + emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), fs_reg(0))); + /* sample index */ + emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index)); + mlen += reg_width; + inst = emit(SHADER_OPCODE_TXF_MS, dst); + break; } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -1087,7 +1098,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, fs_inst * fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, - fs_reg shadow_c, fs_reg lod, fs_reg lod2) + fs_reg shadow_c, fs_reg lod, fs_reg lod2, + fs_reg sample_index) { int mlen = 0; int base_mrf = 2; @@ -1183,10 +1195,31 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, mlen += reg_width; } break; + case ir_txf_ms: + emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index)); + mlen += reg_width; + + /* constant zero MCS; we arrange to never actually have a compressed + * multisample surface here for now. TODO: issue ld_mcs to get this first, + * if we ever support texturing from compressed multisample surfaces + */ + emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u))); + mlen += reg_width; + + /* there is no offsetting for this message; just copy in the integer + * texture coordinates + */ + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), + coordinate)); + coordinate.reg_offset++; + mlen += reg_width; + } + break; } /* Set up the coordinate (except for cases where it was done above) */ - if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) { + if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate)); coordinate.reg_offset++; @@ -1202,6 +1235,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break; case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break; case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break; + case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break; case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; } inst->base_mrf = base_mrf; @@ -1351,7 +1385,7 @@ fs_visitor::visit(ir_texture *ir) shadow_comparitor = this->result; } - fs_reg lod, lod2; + fs_reg lod, lod2, sample_index; switch (ir->op) { case ir_tex: break; @@ -1372,6 +1406,10 @@ fs_visitor::visit(ir_texture *ir) ir->lod_info.lod->accept(this); lod = this->result; break; + case ir_txf_ms: + ir->lod_info.sample_index->accept(this); + sample_index = this->result; + break; }; /* Writemasking doesn't eliminate channels on SIMD8 texture @@ -1381,10 +1419,10 @@ fs_visitor::visit(ir_texture *ir) if (intel->gen >= 7) { inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor, - lod, lod2); + lod, lod2, sample_index); } else if (intel->gen >= 5) { inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor, - lod, lod2); + lod, lod2, sample_index); } else { inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor, lod, lod2); -- 2.7.4