nvc0: handle more query types

author Christoph Bumiller <e0425955@student.tuwien.ac.at>

Fri, 21 Oct 2011 20:58:18 +0000 (22:58 +0200)

committer Christoph Bumiller <e0425955@student.tuwien.ac.at>

Fri, 21 Oct 2011 21:00:40 +0000 (23:00 +0200)
author Christoph Bumiller <e0425955@student.tuwien.ac.at>
Fri, 21 Oct 2011 20:58:18 +0000 (22:58 +0200)
committer Christoph Bumiller <e0425955@student.tuwien.ac.at>
Fri, 21 Oct 2011 21:00:40 +0000 (23:00 +0200)
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h

index c4e481c..c739b1b 100644 (file)
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -168,6 +168,10 @@ void nvc0_program_library_upload(struct nvc0_context *);
  
  /* nvc0_query.c */
  void nvc0_init_query_functions(struct nvc0_context *);
+void nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
+                               struct pipe_query *pq, unsigned result_offset);
+
+#define NVC0_QUERY_TFB_BUFFER_OFFSETS (PIPE_QUERY_TYPES + 0)
  
  /* nvc0_shader_state.c */
  void nvc0_vertprog_validate(struct nvc0_context *);
diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c

index ead015b..be363a2 100644 (file)
--- a/src/gallium/drivers/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nvc0/nvc0_query.c
@@ -25,26 +25,22 @@
  #include "nvc0_context.h"
  #include "nouveau/nv_object.xml.h"
  
-/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
- * (since we use only a single GPU channel per screen) will not work properly.
- *
- * The first is not that big of an issue because OpenGL does not allow nested
- * queries anyway.
- */
-
  struct nvc0_query {
     uint32_t *data;
     uint32_t type;
     uint32_t sequence;
     struct nouveau_bo *bo;
     uint32_t base;
-   uint32_t offset; /* base + i * 16 */
+   uint32_t offset; /* base + i * rotate */
     boolean ready;
+   boolean active;
     boolean is64bit;
+   uint8_t rotate;
+   int nesting; /* only used for occlusion queries */
     struct nouveau_mm_allocation *mm;
  };
  
-#define NVC0_QUERY_ALLOC_SPACE 128
+#define NVC0_QUERY_ALLOC_SPACE 256
  
  static INLINE struct nvc0_query *
  nvc0_query(struct pipe_query *pipe)
@@ -64,7 +60,8 @@ nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
           if (q->ready)
              nouveau_mm_free(q->mm);
           else
-            nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, q->mm);
+            nouveau_fence_work(screen->base.fence.current,
+                               nouveau_mm_free_work, q->mm);
        }
     }
     if (size) {
@@ -97,24 +94,53 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
  {
     struct nvc0_context *nvc0 = nvc0_context(pipe);
     struct nvc0_query *q;
+   unsigned space = NVC0_QUERY_ALLOC_SPACE;
  
     q = CALLOC_STRUCT(nvc0_query);
     if (!q)
        return NULL;
  
-   if (!nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE)) {
+   switch (type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      q->rotate = 32;
+      space = NVC0_QUERY_ALLOC_SPACE;
+      break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      q->is64bit = TRUE;
+      space = 512;
+      break;
+   case PIPE_QUERY_SO_STATISTICS:
+      q->is64bit = TRUE;
+      space = 64;
+      break;
+   case PIPE_QUERY_TIME_ELAPSED:
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      space = 32;
+      break;
+   case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+      space = 16;
+      break;
+   default:
+      FREE(q);
+      return NULL;
+   }
+   if (!nvc0_query_allocate(nvc0, q, space)) {
        FREE(q);
        return NULL;
     }
  
-   q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
-                 type == PIPE_QUERY_PRIMITIVES_EMITTED ||
-                 type == PIPE_QUERY_SO_STATISTICS);
     q->type = type;
  
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-      q->offset -= 16;
-      q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+   if (q->rotate) {
+      /* we advance before query_begin ! */
+      q->offset -= q->rotate;
+      q->data -= q->rotate / sizeof(*q->data);
     }
  
     return (struct pipe_query *)q;
@@ -135,56 +161,83 @@ nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
  }
  
  static void
+nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+   q->offset += q->rotate;
+   q->data += q->rotate / sizeof(*q->data);
+   if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
+      nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+}
+
+static void
  nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
  {
     struct nvc0_context *nvc0 = nvc0_context(pipe);
     struct nouveau_channel *chan = nvc0->screen->base.channel;
     struct nvc0_query *q = nvc0_query(pq);
  
+   const int index = 0; /* vertex stream */
+
     /* For occlusion queries we have to change the storage, because a previous
      * query might set the initial render conition to FALSE even *after* we re-
      * initialized it to TRUE.
      */
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-      q->offset += 16;
-      q->data += 16 / sizeof(*q->data);
-      if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
-         nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+   if (q->rotate) {
+      nvc0_query_rotate(nvc0, q);
  
        /* XXX: can we do this with the GPU, and sync with respect to a previous
         *  query ?
         */
        q->data[1] = 1; /* initial render condition = TRUE */
+      q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
+      q->data[5] = 0;
     }
     if (!q->is64bit)
        q->data[0] = q->sequence++; /* the previously used one */
  
     switch (q->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
-      IMMED_RING(chan, RING_3D(COUNTER_RESET), NVC0_3D_COUNTER_RESET_SAMPLECNT);
-      IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      q->nesting = nvc0->screen->num_occlusion_queries_active++;
+      if (q->nesting) {
+         nvc0_query_get(chan, q, 0x10, 0x0100f002);
+      } else {
+         BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1);
+         OUT_RING  (chan, NVC0_3D_COUNTER_RESET_SAMPLECNT);
+         IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
+      }
        break;
-   case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
-      IMMED_RING(chan, RING_3D(COUNTER_RESET),
-                 NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+      nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
        break;
     case PIPE_QUERY_PRIMITIVES_EMITTED:
-      IMMED_RING(chan, RING_3D(COUNTER_RESET),
-                 NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
+      nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5));
        break;
     case PIPE_QUERY_SO_STATISTICS:
-      BEGIN_RING_NI(chan, RING_3D(COUNTER_RESET), 2);
-      OUT_RING  (chan, NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES);
-      OUT_RING  (chan, NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+      nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5));
+      nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5));
        break;
     case PIPE_QUERY_TIMESTAMP_DISJOINT:
     case PIPE_QUERY_TIME_ELAPSED:
        nvc0_query_get(chan, q, 0x10, 0x00005002);
        break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      nvc0_query_get(chan, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
+      nvc0_query_get(chan, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
+      nvc0_query_get(chan, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
+      nvc0_query_get(chan, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
+      nvc0_query_get(chan, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
+      nvc0_query_get(chan, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
+      nvc0_query_get(chan, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+      nvc0_query_get(chan, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
+      nvc0_query_get(chan, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
+      nvc0_query_get(chan, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
+      break;
     default:
        break;
     }
     q->ready = FALSE;
+   q->active = TRUE;
  }
  
  static void
@@ -196,22 +249,38 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
  
     const int index = 0; /* for multiple vertex streams */
  
+   if (!q->active) {
+      /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
+      if (q->rotate)
+         nvc0_query_rotate(nvc0, q);
+      else
+      if (!q->is64bit)
+         q->data[0] = q->sequence++;
+   }
+   q->ready = FALSE;
+   q->active = FALSE;
+
     switch (q->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
        nvc0_query_get(chan, q, 0, 0x0100f002);
-      BEGIN_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
-      OUT_RING  (chan, 0);
+      if (--nvc0->screen->num_occlusion_queries_active == 0)
+         IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0);
        break;
     case PIPE_QUERY_PRIMITIVES_GENERATED:
-      nvc0_query_get(chan, q, 0, 0x09005002 | (index << 5));
+      nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5));
        break;
     case PIPE_QUERY_PRIMITIVES_EMITTED:
        nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
        break;
     case PIPE_QUERY_SO_STATISTICS:
        nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
-      nvc0_query_get(chan, q, 0x10, 0x09005002 | (index << 5));
+      nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
+      break;
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5));
        break;
+   case PIPE_QUERY_TIMESTAMP:
     case PIPE_QUERY_TIMESTAMP_DISJOINT:
     case PIPE_QUERY_TIME_ELAPSED:
        nvc0_query_get(chan, q, 0, 0x00005002);
@@ -219,6 +288,24 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
     case PIPE_QUERY_GPU_FINISHED:
        nvc0_query_get(chan, q, 0, 0x1000f010);
        break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      nvc0_query_get(chan, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
+      nvc0_query_get(chan, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
+      nvc0_query_get(chan, q, 0x20, 0x02802002); /* VP, LAUNCHES */
+      nvc0_query_get(chan, q, 0x30, 0x03806002); /* GP, LAUNCHES */
+      nvc0_query_get(chan, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
+      nvc0_query_get(chan, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
+      nvc0_query_get(chan, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+      nvc0_query_get(chan, q, 0x70, 0x0980a002); /* ROP, PIXELS */
+      nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
+      nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
+      break;
+   case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+      nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */
+      nvc0_query_get(chan, q, 0x04, 0x1d005022);
+      nvc0_query_get(chan, q, 0x08, 0x1d005042);
+      nvc0_query_get(chan, q, 0x0c, 0x1d005062);
+      break;
     default:
        assert(0);
        break;
@@ -250,11 +337,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
     uint32_t *res32 = result;
     boolean *res8 = result;
     uint64_t *data64 = (uint64_t *)q->data;
-
-   if (q->type == PIPE_QUERY_GPU_FINISHED) {
-      res8[0] = nvc0_query_ready(q);
-      return TRUE;
-   }
+   unsigned i;
  
     if (!q->ready) /* update ? */
        q->ready = nvc0_query_ready(q);
@@ -271,24 +354,50 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
     q->ready = TRUE;
  
     switch (q->type) {
+   case PIPE_QUERY_GPU_FINISHED:
+      res32[0] = 0;
+      res8[0] = TRUE;
+      break;
     case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
-      res32[0] = q->data[1];
+      res64[0] = q->data[1] - q->data[5];
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      res32[0] = 0;
+      res8[0] = q->data[1] != q->data[5];
        break;
     case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
     case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
-      res64[0] = data64[0];
+      res64[0] = data64[0] - data64[2];
        break;
     case PIPE_QUERY_SO_STATISTICS:
-      res64[0] = data64[0];
-      res64[1] = data64[1];
+      res64[0] = data64[0] - data64[4];
+      res64[1] = data64[2] - data64[6];
+      break;
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      res32[0] = 0;
+      res8[0] = !q->data[1];
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+      res64[0] = data64[1];
        break;
     case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
        res64[0] = 1000000000;
-      res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE;
+      res32[2] = 0;
+      res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE;
        break;
     case PIPE_QUERY_TIME_ELAPSED:
        res64[0] = data64[1] - data64[3];
        break;
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      for (i = 0; i < 10; ++i)
+         res64[i] = data64[i * 2] - data64[24 + i * 2];
+      break;
+   case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+      res32[0] = q->data[0];
+      res32[1] = q->data[1];
+      res32[2] = q->data[2];
+      res32[3] = q->data[3];
+      break;
     default:
        return FALSE;
     }
@@ -303,6 +412,11 @@ nvc0_render_condition(struct pipe_context *pipe,
     struct nvc0_context *nvc0 = nvc0_context(pipe);
     struct nouveau_channel *chan = nvc0->screen->base.channel;
     struct nvc0_query *q;
+   uint32_t cond;
+   boolean negated = FALSE;
+   boolean wait =
+      mode != PIPE_RENDER_COND_NO_WAIT &&
+      mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
  
     if (!pq) {
        IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
@@ -310,8 +424,33 @@ nvc0_render_condition(struct pipe_context *pipe,
     }
     q = nvc0_query(pq);
  
-   if (mode == PIPE_RENDER_COND_WAIT ||
-       mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+   /* NOTE: comparison of 2 queries only works if both have completed */
+   switch (q->type) {
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      /* query writes 1 if there was no overflow */
+      cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO :
+                       NVC0_3D_COND_MODE_EQUAL;
+      wait = TRUE;
+      break;
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      if (likely(!negated)) {
+         if (unlikely(q->nesting))
+            cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
+                          NVC0_3D_COND_MODE_ALWAYS;
+         else
+            cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
+      } else {
+         cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
+      }
+      break;
+   default:
+      assert(!"render condition query not a predicate");
+      mode = NVC0_3D_COND_MODE_ALWAYS;
+      break;
+   }
+
+   if (wait) {
        MARK_RING (chan, 5, 2);
        BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
        OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
@@ -324,7 +463,17 @@ nvc0_render_condition(struct pipe_context *pipe,
     BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
     OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
     OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
-   OUT_RING  (chan, NVC0_3D_COND_MODE_RES_NON_ZERO);
+   OUT_RING  (chan, cond);
+}
+
+void
+nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
+                          struct pipe_query *pq, unsigned result_offset)
+{
+   struct nvc0_query *q = nvc0_query(pq);
+
+   nouveau_pushbuf_submit(nvc0->screen->base.channel,
+                          q->bo, q->offset + result_offset, 4);
  }
  
  void
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h

index cad9173..98a94c6 100644 (file)
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -27,6 +27,8 @@ struct nvc0_screen {
  
     struct nvc0_context *cur_ctx;
  
+   int num_occlusion_queries_active;
+
     struct nouveau_bo *text;
     struct nouveau_bo *uniforms;
     struct nouveau_bo *tls;
author	Christoph Bumiller <e0425955@student.tuwien.ac.at>
	Fri, 21 Oct 2011 20:58:18 +0000 (22:58 +0200)
committer	Christoph Bumiller <e0425955@student.tuwien.ac.at>
	Fri, 21 Oct 2011 21:00:40 +0000 (23:00 +0200)
src/gallium/drivers/nvc0/nvc0_context.h		patch \| blob \| history
src/gallium/drivers/nvc0/nvc0_query.c		patch \| blob \| history
src/gallium/drivers/nvc0/nvc0_screen.h		patch \| blob \| history