i965/vec4: add packing support for tcs load outputs
authorTimothy Arceri <timothy.arceri@collabora.com>
Fri, 24 Jun 2016 02:31:56 +0000 (12:31 +1000)
committerTimothy Arceri <timothy.arceri@collabora.com>
Thu, 21 Jul 2016 02:06:11 +0000 (12:06 +1000)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
src/mesa/drivers/dri/i965/brw_vec4_tcs.h
src/mesa/drivers/dri/i965/brw_vec4_tes.cpp

index 4bc3be7..30c81c5 100644 (file)
@@ -201,6 +201,7 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
 void
 vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
                                        unsigned base_offset,
+                                       unsigned first_component,
                                        const src_reg &indirect_offset)
 {
    vec4_instruction *inst;
@@ -216,6 +217,12 @@ vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
    read->offset = base_offset;
    read->mlen = 1;
    read->base_mrf = -1;
+
+   if (first_component) {
+      src_reg src = src_reg(dst);
+      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
+      emit(MOV(dst, src));
+   }
 }
 
 void
@@ -295,14 +302,15 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          case GL_QUADS: {
             /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
             dst_reg tmp(this, glsl_type::vec4_type);
-            emit_output_urb_read(tmp, 0, src_reg());
+            emit_output_urb_read(tmp, 0, 0, src_reg());
             emit(MOV(writemask(dst, WRITEMASK_XY),
                      swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
             break;
          }
          case GL_TRIANGLES:
             /* DWord 4; use offset 1 but normal swizzle/writemask. */
-            emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg());
+            emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0,
+                                 src_reg());
             break;
          case GL_ISOLINES:
             /* All channels are undefined. */
@@ -334,10 +342,11 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          }
 
          dst_reg tmp(this, glsl_type::vec4_type);
-         emit_output_urb_read(tmp, 1, src_reg());
+         emit_output_urb_read(tmp, 1, 0, src_reg());
          emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
       } else {
-         emit_output_urb_read(dst, imm_offset, indirect_offset);
+         emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr),
+                              indirect_offset);
       }
       break;
    }
index d408e56..030eb5e 100644 (file)
@@ -64,6 +64,7 @@ protected:
                             const src_reg &indirect_offset);
    void emit_output_urb_read(const dst_reg &dst,
                              unsigned base_offset,
+                             unsigned first_component,
                              const src_reg &indirect_offset);
 
    void emit_urb_write(const src_reg &value, unsigned writemask,
index 8266a9d..226dcb4 100644 (file)
@@ -179,7 +179,7 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       src_reg indirect_offset = get_indirect_offset(instr);
       dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
       unsigned imm_offset = instr->const_index[0];
-      unsigned fist_component = nir_intrinsic_component(instr);
+      unsigned first_component = nir_intrinsic_component(instr);
       src_reg header = input_read_header;
 
       if (indirect_offset.file != BAD_FILE) {
@@ -193,7 +193,7 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          const unsigned max_push_slots = 24;
          if (imm_offset < max_push_slots) {
             src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type);
-            src.swizzle = BRW_SWZ_COMP_INPUT(fist_component);
+            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
 
             emit(MOV(dst, src));
             prog_data->urb_read_length =
@@ -210,7 +210,7 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
 
       src_reg src = src_reg(temp);
-      src.swizzle = BRW_SWZ_COMP_INPUT(fist_component);
+      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
 
       /* Copy to target.  We might end up with some funky writemasks landing
        * in here, but we really don't want them in the above pseudo-ops.