nvc0: add support for ARB_query_buffer_object
authorIlia Mirkin <imirkin@alum.mit.edu>
Sun, 31 Jan 2016 20:18:23 +0000 (15:18 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Fri, 5 Feb 2016 02:21:30 +0000 (21:21 -0500)
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
13 files changed:
docs/GL3.txt
docs/relnotes/11.2.0.html
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
src/gallium/drivers/nouveau/nvc0/nvc0_context.c
src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
src/gallium/drivers/nouveau/nvc0/nvc0_query.c
src/gallium/drivers/nouveau/nvc0/nvc0_query.h
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c

index 7623ada..257fc73 100644 (file)
@@ -186,7 +186,7 @@ GL 4.4, GLSL 4.40:
   - specified transform/feedback layout                in progress
   - input/output block locations                       DONE
   GL_ARB_multi_bind                                    DONE (all drivers)
-  GL_ARB_query_buffer_object                           not started
+  GL_ARB_query_buffer_object                           DONE (nvc0)
   GL_ARB_texture_mirror_clamp_to_edge                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_texture_stencil8                              DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_vertex_type_10f_11f_11f_rev                   DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
index 404e293..c35ee9a 100644 (file)
@@ -48,6 +48,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_compute_shader on i965</li>
 <li>GL_ARB_copy_image on r600</li>
 <li>GL_ARB_indirect_parameters on nvc0</li>
+<li>GL_ARB_query_buffer_object on nvc0</li>
 <li>GL_ARB_shader_atomic_counters on nvc0</li>
 <li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
 <li>GL_ARB_shader_storage_buffer_object on nvc0</li>
index 4daa57d..7f76ec6 100644 (file)
@@ -491,3 +491,52 @@ daic_runout:
 daic_runout_check:
    branz annul $r7 #daic_runout
    bra annul #daic_restore
+
+/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
+ *
+ * This is a combination macro for all of our query buffer object needs.
+ * It has the option to clamp results to a configurable amount, as well as
+ * to write out one or two words.
+ *
+ * We use the query engine to write out the values, and expect the query
+ * address to point to the right place.
+ *
+ * arg = clamp value (0 means unclamped). clamped means just 1 written value.
+ * parm[0] = LSB of end value
+ * parm[1] = MSB of end value
+ * parm[2] = LSB of start value
+ * parm[3] = MSB of start value
+ * parm[4] = desired sequence
+ * parm[5] = actual sequence
+ */
+.section #mme9097_query_buffer_write
+   parm $r2
+   parm $r3
+   parm $r4
+   parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */
+   parm $r6
+   parm $r7
+   mov $r6 (sub $r7 $r6) /* actual - desired */
+   mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
+   braz annul $r6 #qbw_ready
+   exit
+qbw_ready:
+   mov $r2 (sub $r2 $r4)
+   braz $r1 #qbw_postclamp
+   mov $r3 (sbb $r3 $r5)
+   branz annul $r3 #qbw_clamp
+   mov $r4 (sub $r1 $r2)
+   mov $r4 (sbb 0x0 0x0)
+   braz annul $r4 #qbw_postclamp
+qbw_clamp:
+   mov $r2 $r1
+qbw_postclamp:
+   send $r2
+   mov $r4 0x1000
+   branz annul $r1 #qbw_done
+   send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+   maddr 0x16c2 /* QUERY_SEQUENCE */
+   send $r3
+qbw_done:
+   exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+   nop
index bf8625e..ecadf7e 100644 (file)
@@ -332,3 +332,36 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
        0xfffef837,
        0xfffdc027,
 };
+
+uint32_t mme9097_query_buffer_write[] = {
+       0x00000201,
+       0x00000301,
+/* 0x000a: qbw_ready */
+       0x00000401,
+       0x05b08551,
+/* 0x0011: qbw_clamp */
+/* 0x0012: qbw_postclamp */
+       0x00000601,
+       0x00000701,
+/* 0x0018: qbw_done */
+       0x0005be10,
+       0x00060610,
+       0x0000b027,
+       0x00000091,
+       0x00051210,
+       0x0001c807,
+       0x00075b10,
+       0x00011837,
+       0x00048c10,
+       0x00060410,
+       0x0000a027,
+       0x00000a11,
+       0x00001041,
+       0x04000411,
+       0x00010837,
+       0x84010042,
+       0x05b08021,
+       0x00001841,
+       0x840100c2,
+       0x00000011,
+};
index 0eb3792..547b8f5 100644 (file)
@@ -188,10 +188,9 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
                                  int ref)
 {
    struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
-   unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
    unsigned s, i;
 
-   if (bind & PIPE_BIND_RENDER_TARGET) {
+   if (res->bind & PIPE_BIND_RENDER_TARGET) {
       for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
          if (nvc0->framebuffer.cbufs[i] &&
              nvc0->framebuffer.cbufs[i]->texture == res) {
@@ -202,7 +201,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
          }
       }
    }
-   if (bind & PIPE_BIND_DEPTH_STENCIL) {
+   if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
       if (nvc0->framebuffer.zsbuf &&
           nvc0->framebuffer.zsbuf->texture == res) {
          nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
@@ -212,13 +211,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
       }
    }
 
-   if (bind & (PIPE_BIND_VERTEX_BUFFER |
-               PIPE_BIND_INDEX_BUFFER |
-               PIPE_BIND_CONSTANT_BUFFER |
-               PIPE_BIND_SHADER_BUFFER |
-               PIPE_BIND_STREAM_OUTPUT |
-               PIPE_BIND_COMMAND_ARGS_BUFFER |
-               PIPE_BIND_SAMPLER_VIEW)) {
+   if (res->target == PIPE_BUFFER) {
       for (i = 0; i < nvc0->num_vtxbufs; ++i) {
          if (nvc0->vtxbuf[i].buffer == res) {
             nvc0->dirty |= NVC0_NEW_ARRAYS;
index 27c026b..49e176c 100644 (file)
@@ -33,4 +33,6 @@
 
 #define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT             0x00003850
 
+#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE                       0x00003858
+
 #endif /* __NVC0_MACROS_H__ */
index 7497317..d2acce7 100644 (file)
@@ -74,6 +74,24 @@ nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
 }
 
 static void
+nvc0_get_query_result_resource(struct pipe_context *pipe,
+                               struct pipe_query *pq,
+                               boolean wait,
+                               enum pipe_query_value_type result_type,
+                               int index,
+                               struct pipe_resource *resource,
+                               unsigned offset)
+{
+   struct nvc0_query *q = nvc0_query(pq);
+   if (!q->funcs->get_query_result_resource) {
+      assert(!"Unexpected lack of get_query_result_resource");
+      return;
+   }
+   q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type,
+                                       index, resource, offset);
+}
+
+static void
 nvc0_render_condition(struct pipe_context *pipe,
                       struct pipe_query *pq,
                       boolean condition, uint mode)
@@ -129,7 +147,7 @@ nvc0_render_condition(struct pipe_context *pipe,
    }
 
    if (wait)
-      nvc0_hw_query_fifo_wait(push, q);
+      nvc0_hw_query_fifo_wait(nvc0, q);
 
    PUSH_SPACE(push, 7);
    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
@@ -262,6 +280,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
    pipe->begin_query = nvc0_begin_query;
    pipe->end_query = nvc0_end_query;
    pipe->get_query_result = nvc0_get_query_result;
+   pipe->get_query_result_resource = nvc0_get_query_result_resource;
    pipe->render_condition = nvc0_render_condition;
    nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
 }
index c46361c..a887b22 100644 (file)
@@ -14,6 +14,13 @@ struct nvc0_query_funcs {
    void (*end_query)(struct nvc0_context *, struct nvc0_query *);
    boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *,
                                boolean, union pipe_query_result *);
+   void (*get_query_result_resource)(struct nvc0_context *nvc0,
+                                     struct nvc0_query *q,
+                                     boolean wait,
+                                     enum pipe_query_value_type result_type,
+                                     int index,
+                                     struct pipe_resource *resource,
+                                     unsigned offset);
 };
 
 struct nvc0_query {
index 1bed016..6238588 100644 (file)
@@ -358,11 +358,119 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
    return true;
 }
 
+static void
+nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
+                                  struct nvc0_query *q,
+                                  boolean wait,
+                                  enum pipe_query_value_type result_type,
+                                  int index,
+                                  struct pipe_resource *resource,
+                                  unsigned offset)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_hw_query *hq = nvc0_hw_query(q);
+   struct nv04_resource *buf = nv04_resource(resource);
+   unsigned stride;
+
+   assert(!hq->funcs || !hq->funcs->get_query_result);
+
+   if (index == -1) {
+      /* TODO: Use a macro to write the availability of the query */
+      if (hq->state != NVC0_HW_QUERY_STATE_READY)
+         nvc0_hw_query_update(nvc0->screen->base.client, q);
+      uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY};
+      nvc0->base.push_cb(&nvc0->base, buf, offset,
+                         result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
+                         ready);
+      return;
+   }
+
+   /* If the fence guarding this query has not been emitted, that makes a lot
+    * of the following logic more complicated.
+    */
+   if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
+      nouveau_fence_emit(hq->fence);
+
+   /* We either need to compute a 32- or 64-bit difference between 2 values,
+    * and then store the result as either a 32- or 64-bit value. As such let's
+    * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
+    * ones), and have one macro that clamps result to i32, u32, or just
+    * outputs the difference (no need to worry about 64-bit clamping).
+    */
+   if (hq->state != NVC0_HW_QUERY_STATE_READY)
+      nvc0_hw_query_update(nvc0->screen->base.client, q);
+
+   if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
+      nvc0_hw_query_fifo_wait(nvc0, q);
+
+   nouveau_pushbuf_space(push, 16, 2, 0);
+   PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+   PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
+   BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
+   PUSH_DATAh(push, buf->address + offset);
+   PUSH_DATA (push, buf->address + offset);
+   BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7);
+   if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */
+      PUSH_DATA(push, 0x00000001);
+   else if (result_type == PIPE_QUERY_TYPE_I32)
+      PUSH_DATA(push, 0x7fffffff);
+   else if (result_type == PIPE_QUERY_TYPE_U32)
+      PUSH_DATA(push, 0xffffffff);
+   else
+      PUSH_DATA(push, 0x00000000);
+
+   switch (q->type) {
+   case PIPE_QUERY_SO_STATISTICS:
+      stride = 2;
+      break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      stride = 12;
+      break;
+   default:
+      assert(index == 0);
+      stride = 1;
+      break;
+   }
+
+   if (hq->is64bit) {
+      nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index,
+                           8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride),
+                           8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+   } else {
+      nouveau_pushbuf_data(push, hq->bo, hq->offset + 4,
+                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      PUSH_DATA(push, 0);
+      nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4,
+                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      PUSH_DATA(push, 0);
+   }
+
+   if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
+      PUSH_DATA(push, 0);
+      PUSH_DATA(push, 0);
+   } else if (hq->is64bit) {
+      PUSH_DATA(push, hq->fence->sequence);
+      nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
+                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+   } else {
+      PUSH_DATA(push, hq->sequence);
+      nouveau_pushbuf_data(push, hq->bo, hq->offset,
+                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+   }
+
+   if (buf->mm) {
+      nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+      nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+   }
+}
+
 static const struct nvc0_query_funcs hw_query_funcs = {
    .destroy_query = nvc0_hw_destroy_query,
    .begin_query = nvc0_hw_begin_query,
    .end_query = nvc0_hw_end_query,
    .get_query_result = nvc0_hw_get_query_result,
+   .get_query_result_resource = nvc0_hw_get_query_result_resource,
 };
 
 struct nvc0_query *
@@ -476,8 +584,9 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push,
 }
 
 void
-nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
+nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q)
 {
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_hw_query *hq = nvc0_hw_query(q);
    unsigned offset = hq->offset;
 
@@ -486,9 +595,15 @@ nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
    PUSH_SPACE(push, 5);
    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
    BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
-   PUSH_DATAh(push, hq->bo->offset + offset);
-   PUSH_DATA (push, hq->bo->offset + offset);
-   PUSH_DATA (push, hq->sequence);
+   if (hq->is64bit) {
+      PUSH_DATAh(push, nvc0->screen->fence.bo->offset);
+      PUSH_DATA (push, nvc0->screen->fence.bo->offset);
+      PUSH_DATA (push, hq->fence->sequence);
+   } else {
+      PUSH_DATAh(push, hq->bo->offset + offset);
+      PUSH_DATA (push, hq->bo->offset + offset);
+      PUSH_DATA (push, hq->sequence);
+   }
    PUSH_DATA (push, (1 << 12) |
               NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
 }
index 3701eb7..8225755 100644 (file)
@@ -51,6 +51,6 @@ void
 nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *, struct nvc0_query *,
                              unsigned);
 void
-nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nvc0_query *);
+nvc0_hw_query_fifo_wait(struct nvc0_context *, struct nvc0_query *);
 
 #endif
index 17ea943..45693c8 100644 (file)
@@ -191,6 +191,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_MULTI_DRAW_INDIRECT:
    case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
    case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+   case PIPE_CAP_QUERY_BUFFER_OBJECT:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
       return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -219,7 +220,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_STRING_MARKER:
    case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
    case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
-   case PIPE_CAP_QUERY_BUFFER_OBJECT:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -683,7 +683,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
       PIPE_BIND_SHADER_BUFFER |
       PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
-      PIPE_BIND_COMMAND_ARGS_BUFFER;
+      PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER;
    screen->base.sysmem_bindings |=
       PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
 
@@ -1049,6 +1049,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
    MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
    MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
+   MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
 
    BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
    PUSH_DATA (push, 1);
index dc02b01..382a18e 100644 (file)
@@ -316,7 +316,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
          continue;
 
       if (!targ->clean)
-         nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
+         nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
       nouveau_pushbuf_space(push, 0, 0, 1);
       BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
       PUSH_DATA (push, 1);
index e8ee486..65ecc7f 100644 (file)
@@ -789,7 +789,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
       PUSH_SPACE(push, 2);
       IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
-      nvc0_hw_query_fifo_wait(push, nvc0_query(so->pq));
+      nvc0_hw_query_fifo_wait(nvc0, nvc0_query(so->pq));
       if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
          IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);