radeonsi: fix streamout overflow predication on VI+

author Nicolai Hähnle <nicolai.haehnle@amd.com>

Thu, 27 Jul 2017 17:24:59 +0000 (19:24 +0200)

committer Nicolai Hähnle <nicolai.haehnle@amd.com>

Wed, 2 Aug 2017 07:48:53 +0000 (09:48 +0200)
author Nicolai Hähnle <nicolai.haehnle@amd.com>
Thu, 27 Jul 2017 17:24:59 +0000 (19:24 +0200)
committer Nicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 2 Aug 2017 07:48:53 +0000 (09:48 +0200)
diff --git a/src/amd/common/r600d_common.h b/src/amd/common/r600d_common.h

index 3374475..0b61215 100644 (file)
--- a/src/amd/common/r600d_common.h
+++ b/src/amd/common/r600d_common.h
@@ -117,6 +117,7 @@
  #define PREDICATION_OP_CLEAR 0x0
  #define PREDICATION_OP_ZPASS 0x1
  #define PREDICATION_OP_PRIMCOUNT 0x2
+#define PREDICATION_OP_BOOL64 0x3
  #define PRED_OP(x) ((x) << 16)
  #define PREDICATION_CONTINUE (1 << 31)
  #define PREDICATION_HINT_WAIT (0 << 12)
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c

index 62ab745..5879a07 100644 (file)
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -505,6 +505,7 @@ void r600_query_hw_destroy(struct r600_common_screen *rscreen,
         }
  
         r600_resource_reference(&query->buffer.buf, NULL);
+       r600_resource_reference(&query->workaround_buf, NULL);
         FREE(rquery);
  }
  
@@ -946,19 +947,23 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
         flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
                     ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
  
-       switch (query->b.type) {
-       case PIPE_QUERY_OCCLUSION_COUNTER:
-       case PIPE_QUERY_OCCLUSION_PREDICATE:
-               op = PRED_OP(PREDICATION_OP_ZPASS);
-               break;
-       case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-       case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
-               op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
-               invert = !invert;
-               break;
-       default:
-               assert(0);
-               return;
+       if (query->workaround_buf) {
+               op = PRED_OP(PREDICATION_OP_BOOL64);
+       } else {
+               switch (query->b.type) {
+               case PIPE_QUERY_OCCLUSION_COUNTER:
+               case PIPE_QUERY_OCCLUSION_PREDICATE:
+                       op = PRED_OP(PREDICATION_OP_ZPASS);
+                       break;
+               case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+               case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+                       op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
+                       invert = !invert;
+                       break;
+               default:
+                       assert(0);
+                       return;
+               }
         }
  
         /* if true then invert, see GL_ARB_conditional_render_inverted */
@@ -967,8 +972,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
         else
                 op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */
  
+       /* Use the value written by compute shader as a workaround. Note that
+        * the wait flag does not apply in this predication mode.
+        *
+        * The shader outputs the result value to L2. Workarounds only affect VI
+        * and later, where the CP reads data from L2, so we don't need an
+        * additional flush.
+        */
+       if (query->workaround_buf) {
+               uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset;
+               emit_set_predicate(ctx, query->workaround_buf, va, op);
+               return;
+       }
+
         op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
-       
+
         /* emit predicate packets for all data blocks */
         for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
                 unsigned results_base = 0;
@@ -1064,6 +1082,8 @@ bool r600_query_hw_begin(struct r600_common_context *rctx,
         if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
                 r600_query_hw_reset_buffers(rctx, query);
  
+       r600_resource_reference(&query->workaround_buf, NULL);
+
         r600_query_hw_emit_start(rctx, query);
         if (!query->buffer.buf)
                 return false;
@@ -1777,11 +1797,43 @@ static void r600_render_condition(struct pipe_context *ctx,
         /* Compute the size of SET_PREDICATION packets. */
         atom->num_dw = 0;
         if (query) {
-               for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
-                       atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+               bool needs_workaround = false;
+
+               /* There is a firmware regression in VI which causes successive
+                * SET_PREDICATION packets to give the wrong answer for
+                * non-inverted stream overflow predication.
+                */
+               if (rctx->chip_class >= VI && !condition &&
+                   (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
+                    (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
+                     (rquery->buffer.previous ||
+                      rquery->buffer.results_end > rquery->result_size)))) {
+                       needs_workaround = true;
+               }
  
-               if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
-                       atom->num_dw *= R600_MAX_STREAMS;
+               if (needs_workaround && !rquery->workaround_buf) {
+                       bool old_force_off = rctx->render_cond_force_off;
+                       rctx->render_cond_force_off = true;
+
+                       u_suballocator_alloc(
+                               rctx->allocator_zeroed_memory, 8, 8,
+                               &rquery->workaround_offset,
+                               (struct pipe_resource **)&rquery->workaround_buf);
+
+                       ctx->get_query_result_resource(
+                               ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
+                               &rquery->workaround_buf->b.b, rquery->workaround_offset);
+
+                       atom->num_dw = 5;
+
+                       rctx->render_cond_force_off = old_force_off;
+               } else {
+                       for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
+                               atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+
+                       if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+                               atom->num_dw *= R600_MAX_STREAMS;
+               }
         }
  
         rctx->set_atom_dirty(rctx, atom, query != NULL);
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h

index e176246..5565d70 100644 (file)
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -194,6 +194,10 @@ struct r600_query_hw {
         struct list_head list;
         /* For transform feedback: which stream the query is for */
         unsigned stream;
+
+       /* Workaround via compute shader */
+       struct r600_resource *workaround_buf;
+       unsigned workaround_offset;
  };
  
  bool r600_query_hw_init(struct r600_common_screen *rscreen,
author	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Thu, 27 Jul 2017 17:24:59 +0000 (19:24 +0200)
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Wed, 2 Aug 2017 07:48:53 +0000 (09:48 +0200)
src/amd/common/r600d_common.h		patch \| blob \| history
src/gallium/drivers/radeon/r600_query.c		patch \| blob \| history
src/gallium/drivers/radeon/r600_query.h		patch \| blob \| history