i965/gen6/gs: Implement GS_OPCODE_FF_SYNC.
authorIago Toral Quiroga <itoral@igalia.com>
Wed, 9 Jul 2014 06:46:17 +0000 (08:46 +0200)
committerIago Toral Quiroga <itoral@igalia.com>
Fri, 19 Sep 2014 13:01:15 +0000 (15:01 +0200)
This implements the FF_SYNC message required in gen6  geometry shaders to
get the initial URB handle.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp

index 0ef43e9..27a513a 100644 (file)
@@ -1014,6 +1014,21 @@ enum opcode {
     * - dst is the GRF for gl_InvocationID.
     */
    GS_OPCODE_GET_INSTANCE_ID,
+
+   /**
+    * Send a FF_SYNC message to allocate initial URB handles (gen6).
+    *
+    * - dst will be used as the writeback register for the FF_SYNC operation.
+    *
+    * - src0 is the number of primitives written.
+    *
+    * Note: This opcode uses an implicit MRF register for the ff_sync message
+    * header, so the caller is expected to set inst->base_mrf and initialize
+    * that MRF register to r0. This opcode will also write to this MRF register
+    * to include the allocated URB handle so it can then be reused directly as
+    * the header in the URB write operation we are allocating the handle for.
+    */
+   GS_OPCODE_FF_SYNC,
 };
 
 enum brw_derivative_quality {
index 1a18169..4eea74e 100644 (file)
@@ -522,6 +522,8 @@ brw_instruction_name(enum opcode op)
       return "set_channel_masks";
    case GS_OPCODE_GET_INSTANCE_ID:
       return "get_instance_id";
+   case GS_OPCODE_FF_SYNC:
+      return "ff_sync";
 
    default:
       /* Yes, this leaks.  It's in debug code, it should never occur, and if
index 9102be2..fa99276 100644 (file)
@@ -276,6 +276,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case GS_OPCODE_URB_WRITE:
    case GS_OPCODE_THREAD_END:
       return 0;
+   case GS_OPCODE_FF_SYNC:
+      return 1;
    case SHADER_OPCODE_SHADER_TIME_ADD:
       return 0;
    case SHADER_OPCODE_TEX:
index 186667c..d2d2a8f 100644 (file)
@@ -656,6 +656,9 @@ private:
    void generate_gs_prepare_channel_masks(struct brw_reg dst);
    void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
    void generate_gs_get_instance_id(struct brw_reg dst);
+   void generate_gs_ff_sync(vec4_instruction *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src0);
    void generate_oword_dual_block_offsets(struct brw_reg m1,
                                          struct brw_reg index);
    void generate_scratch_write(vec4_instruction *inst,
index 954ce64..c1eda12 100644 (file)
@@ -659,6 +659,47 @@ vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
 }
 
 void
+vec4_generator::generate_gs_ff_sync(vec4_instruction *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg src0)
+{
+   /* This opcode uses an implied MRF register for:
+    *  - the header of the ff_sync message. And as such it is expected to be
+    *    initialized to r0 before calling here.
+    *  - the destination where we will write the allocated URB handle.
+    */
+   struct brw_reg header =
+      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
+
+   /* Overwrite dword 0 of the header (cleared for now since we are not doing
+    * transform feedback) and dword 1 (to hold the number of primitives
+    * written).
+    */
+   brw_push_insn_state(p);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_MOV(p, get_element_ud(header, 0), brw_imm_ud(0));
+   brw_MOV(p, get_element_ud(header, 1), get_element_ud(src0, 0));
+   brw_pop_insn_state(p);
+
+   /* Allocate URB handle in dst */
+   brw_ff_sync(p,
+               dst,
+               0,
+               header,
+               1, /* allocate */
+               1, /* response length */
+               0 /* eot */);
+
+   /* Now put allocated urb handle in header.0 */
+   brw_push_insn_state(p);
+   brw_set_default_access_mode(p, BRW_ALIGN_1);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   brw_MOV(p, get_element_ud(header, 0), get_element_ud(dst, 0));
+   brw_pop_insn_state(p);
+}
+
+void
 vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
                                                   struct brw_reg index)
 {
@@ -1280,6 +1321,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
          generate_gs_set_vertex_count(dst, src[0]);
          break;
 
+      case GS_OPCODE_FF_SYNC:
+         generate_gs_ff_sync(inst, dst, src[0]);
+         break;
+
       case GS_OPCODE_SET_DWORD_2_IMMED:
          generate_gs_set_dword_2_immed(dst, src[0]);
          break;