i965: Fix GS hang on Sandybridge
authorZhenyu Wang <zhenyuw@linux.intel.com>
Thu, 14 Oct 2010 02:54:53 +0000 (10:54 +0800)
committerZhenyu Wang <zhenyuw@linux.intel.com>
Thu, 14 Oct 2010 03:24:42 +0000 (11:24 +0800)
Don't use r0 for FF_SYNC dest reg on Sandybridge, which would
smash FFID field in GS payload, that cause later URB write fail.
Also not use r0 in any URB write requiring allocate.

src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_gs.c
src/mesa/drivers/dri/i965/brw_gs.h
src/mesa/drivers/dri/i965/brw_gs_emit.c

index f9aa5f7..4128638 100644 (file)
@@ -1848,7 +1848,8 @@ void brw_ff_sync(struct brw_compile *p,
    if (intel->gen >= 6) {
       brw_push_insn_state(p);
       brw_set_mask_control( p, BRW_MASK_DISABLE );
-      brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
+      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
+             retype(src0, BRW_REGISTER_TYPE_UD));
       brw_pop_insn_state(p);
       src0 = brw_message_reg(msg_reg_nr);
    }
index ad178d8..cfcc8ea 100644 (file)
@@ -96,8 +96,6 @@ static void compile_gs_prog( struct brw_context *brw,
       brw_gs_quad_strip( &c, key );
       break;
    case GL_LINE_LOOP:
-      /* XXX fix GS hang issue */
-      assert(intel->gen < 6);
       brw_gs_lines( &c );
       break;
    case GL_LINES:
index 813b8d4..7e35310 100644 (file)
@@ -56,6 +56,7 @@ struct brw_gs_compile {
    struct {
       struct brw_reg R0;
       struct brw_reg vertex[MAX_GS_VERTS];
+      struct brw_reg temp;
    } reg;
 
    /* 3 different ways of expressing vertex size:
index a01d557..e1f751f 100644 (file)
@@ -58,6 +58,8 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c,
       i += c->nr_regs;
    }
 
+   c->reg.temp = brw_vec8_grf(i, 0);
+
    c->prog_data.urb_read_length = c->nr_regs; 
    c->prog_data.total_grf = i;
 }
@@ -69,12 +71,22 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
                            GLuint header)
 {
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &c->func.brw->intel;
    GLboolean allocate = !last;
+   struct brw_reg temp;
+
+   if (intel->gen < 6)
+       temp = c->reg.R0;
+   else {
+       temp = c->reg.temp;
+       brw_MOV(p, retype(temp, BRW_REGISTER_TYPE_UD),
+              retype(c->reg.R0, BRW_REGISTER_TYPE_UD));
+   }
 
    /* Overwrite PrimType and PrimStart in the message header, for
     * each vertex in turn:
     */
-   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+   brw_MOV(p, get_element_ud(temp, 2), brw_imm_ud(header));
 
    /* Copy the vertex from vertn into m1..mN+1:
     */
@@ -87,9 +99,9 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
     * allocated each time.
     */
    brw_urb_WRITE(p, 
-                allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+                allocate ? temp : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 0,
-                c->reg.R0,
+                temp,
                 allocate,
                 1,             /* used */
                 c->nr_regs + 1, /* msg length */
@@ -98,19 +110,39 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
                 1,             /* writes_complete */
                 0,             /* urb offset */
                 BRW_URB_SWIZZLE_NONE);
+
+   if (intel->gen >= 6 && allocate)
+       brw_MOV(p, get_element_ud(c->reg.R0, 0), get_element_ud(temp, 0));
 }
 
 static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
 {
        struct brw_compile *p = &c->func;
-       brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
-       brw_ff_sync(p,
-                   c->reg.R0,
-                   0,
-                   c->reg.R0,
-                   1, /* allocate */
-                   1, /* response length */
-                   0 /* eot */);
+       struct intel_context *intel = &c->func.brw->intel;
+
+       if (intel->gen < 6) {
+           brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
+           brw_ff_sync(p,
+                       c->reg.R0,
+                       0,
+                       c->reg.R0,
+                       1, /* allocate */
+                       1, /* response length */
+                       0 /* eot */);
+       } else {
+           brw_MOV(p, retype(c->reg.temp, BRW_REGISTER_TYPE_UD),
+                   retype(c->reg.R0, BRW_REGISTER_TYPE_UD));
+           brw_MOV(p, get_element_ud(c->reg.temp, 1), brw_imm_ud(num_prim));
+           brw_ff_sync(p,
+                       c->reg.temp,
+                       0,
+                       c->reg.temp,
+                       1, /* allocate */
+                       1, /* response length */
+                       0 /* eot */);
+           brw_MOV(p, get_element_ud(c->reg.R0, 0),
+                   get_element_ud(c->reg.temp, 0));
+       }
 }