nv50: fake enough resume support pre-nva0 to pass gles3 requirements
authorIlia Mirkin <imirkin@alum.mit.edu>
Wed, 6 Jan 2021 05:48:23 +0000 (00:48 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Fri, 8 Jan 2021 18:43:12 +0000 (13:43 -0500)
GLES3 supports pause/resume of xfb. However since there's no geometry
shader support in ES3, it's a lot easier to figure out the offsets to
use. This makes it work for the dEQP tests at least.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8346>

src/gallium/drivers/nouveau/nv50/nv50_context.h
src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
src/gallium/drivers/nouveau/nv50/nv50_state.c
src/gallium/drivers/nouveau/nv50/nv50_vbo.c

index 57b8599..eda5a6f 100644 (file)
@@ -161,6 +161,11 @@ struct nv50_context {
    uint8_t num_so_targets;
    uint8_t so_targets_dirty;
    struct pipe_stream_output_target *so_target[4];
+   /* keeps track of how much of an SO is used. normally this doesn't work in
+    * the presence of GS, but this only needs to work for ES 3.0 which doesn't
+    * have GS or any other oddities. only used pre-NVA0.
+    */
+   uint32_t so_used[4];
 
    struct pipe_framebuffer_state framebuffer;
    struct pipe_blend_color blend_colour;
index ae32a13..6a910d7 100644 (file)
@@ -696,11 +696,17 @@ nv50_stream_output_validate(struct nv50_context *nv50)
 
       const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
 
-      if (n == 4 && !targ->clean)
-         nv84_hw_query_fifo_wait(push, nv50_query(targ->pq));
+      uint32_t so_used = 0;
+
+      if (!targ->clean) {
+         if (n == 4)
+            nv84_hw_query_fifo_wait(push, nv50_query(targ->pq));
+         else
+            so_used = nv50->so_used[i];
+      }
       BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
-      PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
-      PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
+      PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset + so_used);
+      PUSH_DATA (push, buf->address + targ->pipe.buffer_offset + so_used);
       PUSH_DATA (push, so->num_attribs[i]);
       if (n == 4) {
          PUSH_DATA(push, targ->pipe.buffer_size);
@@ -714,9 +720,10 @@ nv50_stream_output_validate(struct nv50_context *nv50)
             targ->clean = false;
          }
       } else {
-         const unsigned limit = targ->pipe.buffer_size /
+         const unsigned limit = (targ->pipe.buffer_size - so_used) /
             (so->stride[i] * nv50->state.prim_size);
          prims = MIN2(prims, limit);
+         targ->clean = false;
       }
       targ->stride = so->stride[i];
       BCTX_REFN(nv50->bufctx_3d, 3D_SO, buf, WR);
index adb6945..7329930 100644 (file)
@@ -1207,8 +1207,10 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
          serialize = false;
       }
 
-      if (targets[i] && !append)
+      if (targets[i] && !append) {
          nv50_so_target(targets[i])->clean = true;
+         nv50->so_used[i] = 0;
+      }
 
       pipe_so_target_reference(&nv50->so_target[i], targets[i]);
    }
index 26539b3..74030db 100644 (file)
@@ -23,6 +23,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "util/u_inlines.h"
+#include "util/u_prim.h"
 #include "util/format/u_format.h"
 #include "translate/translate.h"
 
@@ -845,6 +846,18 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
       PUSH_DATA (push, 0x00010000 * !!nv50->state.mul_zero_wins);
    }
 
+   /* Make starting/pausing streamout work pre-NVA0 enough for ES3.0. This
+    * means counting vertices in a vertex shader when it has so outputs.
+    */
+   if (nv50->screen->base.class_3d < NVA0_3D_CLASS &&
+       nv50->vertprog->pipe.stream_output.num_outputs) {
+      for (int i = 0; i < nv50->num_so_targets; i++) {
+         nv50->so_used[i] += info->instance_count *
+            u_stream_outputs_for_vertices(info->mode, draws[0].count) *
+            nv50->vertprog->pipe.stream_output.stride[i] * 4;
+      }
+   }
+
    if (nv50->vbo_fifo) {
       nv50_push_vbo(nv50, info, indirect, &draws[0]);
       goto cleanup;