From f3805c5f097f0d2bc655f35f9531aaa91813c225 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 24 Jun 2016 12:31:56 +1000 Subject: [PATCH] i965/vec4: add packing support for tcs load outputs Reviewed-by: Kenneth Graunke Reviewed-by: Edward O'Callaghan --- src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 17 +++++++++++++---- src/mesa/drivers/dri/i965/brw_vec4_tcs.h | 1 + src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 6 +++--- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 4bc3be7..30c81c5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -201,6 +201,7 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst, void vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst, unsigned base_offset, + unsigned first_component, const src_reg &indirect_offset) { vec4_instruction *inst; @@ -216,6 +217,12 @@ vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst, read->offset = base_offset; read->mlen = 1; read->base_mrf = -1; + + if (first_component) { + src_reg src = src_reg(dst); + src.swizzle = BRW_SWZ_COMP_INPUT(first_component); + emit(MOV(dst, src)); + } } void @@ -295,14 +302,15 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case GL_QUADS: { /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */ dst_reg tmp(this, glsl_type::vec4_type); - emit_output_urb_read(tmp, 0, src_reg()); + emit_output_urb_read(tmp, 0, 0, src_reg()); emit(MOV(writemask(dst, WRITEMASK_XY), swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); break; } case GL_TRIANGLES: /* DWord 4; use offset 1 but normal swizzle/writemask. */ - emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg()); + emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0, + src_reg()); break; case GL_ISOLINES: /* All channels are undefined. */ @@ -334,10 +342,11 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } dst_reg tmp(this, glsl_type::vec4_type); - emit_output_urb_read(tmp, 1, src_reg()); + emit_output_urb_read(tmp, 1, 0, src_reg()); emit(MOV(dst, swizzle(src_reg(tmp), swiz))); } else { - emit_output_urb_read(dst, imm_offset, indirect_offset); + emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr), + indirect_offset); } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h index d408e56..030eb5e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h @@ -64,6 +64,7 @@ protected: const src_reg &indirect_offset); void emit_output_urb_read(const dst_reg &dst, unsigned base_offset, + unsigned first_component, const src_reg &indirect_offset); void emit_urb_write(const src_reg &value, unsigned writemask, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp index 8266a9d..226dcb4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp @@ -179,7 +179,7 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg indirect_offset = get_indirect_offset(instr); dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); unsigned imm_offset = instr->const_index[0]; - unsigned fist_component = nir_intrinsic_component(instr); + unsigned first_component = nir_intrinsic_component(instr); src_reg header = input_read_header; if (indirect_offset.file != BAD_FILE) { @@ -193,7 +193,7 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) const unsigned max_push_slots = 24; if (imm_offset < max_push_slots) { src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type); - src.swizzle = BRW_SWZ_COMP_INPUT(fist_component); + src.swizzle = BRW_SWZ_COMP_INPUT(first_component); emit(MOV(dst, src)); prog_data->urb_read_length = @@ -210,7 +210,7 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; src_reg src = src_reg(temp); - src.swizzle = BRW_SWZ_COMP_INPUT(fist_component); + src.swizzle = BRW_SWZ_COMP_INPUT(first_component); /* Copy to target. We might end up with some funky writemasks landing * in here, but we really don't want them in the above pseudo-ops. -- 2.7.4