From 2fed18b8a54319f22888e3761a6ed5cd85f9688c Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 31 Jan 2016 15:18:23 -0500 Subject: [PATCH] nvc0: add support for ARB_query_buffer_object Signed-off-by: Ilia Mirkin --- docs/GL3.txt | 2 +- docs/relnotes/11.2.0.html | 1 + src/gallium/drivers/nouveau/nvc0/mme/com9097.mme | 49 ++++++++ src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 33 ++++++ src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 13 +-- src/gallium/drivers/nouveau/nvc0/nvc0_macros.h | 2 + src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 21 +++- src/gallium/drivers/nouveau/nvc0/nvc0_query.h | 7 ++ src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 123 ++++++++++++++++++++- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 5 +- .../drivers/nouveau/nvc0/nvc0_shader_state.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 2 +- 13 files changed, 241 insertions(+), 21 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 7623ada..257fc73 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -186,7 +186,7 @@ GL 4.4, GLSL 4.40: - specified transform/feedback layout in progress - input/output block locations DONE GL_ARB_multi_bind DONE (all drivers) - GL_ARB_query_buffer_object not started + GL_ARB_query_buffer_object DONE (nvc0) GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html index 404e293..c35ee9a 100644 --- a/docs/relnotes/11.2.0.html +++ b/docs/relnotes/11.2.0.html @@ -48,6 +48,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_compute_shader on i965
  • GL_ARB_copy_image on r600
  • GL_ARB_indirect_parameters on nvc0
  • +
  • GL_ARB_query_buffer_object on nvc0
  • GL_ARB_shader_atomic_counters on nvc0
  • GL_ARB_shader_draw_parameters on i965, nvc0
  • GL_ARB_shader_storage_buffer_object on nvc0
  • diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme index 4daa57d..7f76ec6 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme @@ -491,3 +491,52 @@ daic_runout: daic_runout_check: branz annul $r7 #daic_runout bra annul #daic_restore + +/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE: + * + * This is a combination macro for all of our query buffer object needs. + * It has the option to clamp results to a configurable amount, as well as + * to write out one or two words. + * + * We use the query engine to write out the values, and expect the query + * address to point to the right place. + * + * arg = clamp value (0 means unclamped). clamped means just 1 written value. + * parm[0] = LSB of end value + * parm[1] = MSB of end value + * parm[2] = LSB of start value + * parm[3] = MSB of start value + * parm[4] = desired sequence + * parm[5] = actual sequence + */ +.section #mme9097_query_buffer_write + parm $r2 + parm $r3 + parm $r4 + parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */ + parm $r6 + parm $r7 + mov $r6 (sub $r7 $r6) /* actual - desired */ + mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */ + braz annul $r6 #qbw_ready + exit +qbw_ready: + mov $r2 (sub $r2 $r4) + braz $r1 #qbw_postclamp + mov $r3 (sbb $r3 $r5) + branz annul $r3 #qbw_clamp + mov $r4 (sub $r1 $r2) + mov $r4 (sbb 0x0 0x0) + braz annul $r4 #qbw_postclamp +qbw_clamp: + mov $r2 $r1 +qbw_postclamp: + send $r2 + mov $r4 0x1000 + branz annul $r1 #qbw_done + send (extrinsrt 0x0 $r4 0x0 0x10 0x10) + maddr 0x16c2 /* QUERY_SEQUENCE */ + send $r3 +qbw_done: + exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10) + nop diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h index bf8625e..ecadf7e 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h @@ -332,3 +332,36 @@ uint32_t mme9097_draw_arrays_indirect_count[] = { 0xfffef837, 0xfffdc027, }; + +uint32_t mme9097_query_buffer_write[] = { + 0x00000201, + 0x00000301, +/* 0x000a: qbw_ready */ + 0x00000401, + 0x05b08551, +/* 0x0011: qbw_clamp */ +/* 0x0012: qbw_postclamp */ + 0x00000601, + 0x00000701, +/* 0x0018: qbw_done */ + 0x0005be10, + 0x00060610, + 0x0000b027, + 0x00000091, + 0x00051210, + 0x0001c807, + 0x00075b10, + 0x00011837, + 0x00048c10, + 0x00060410, + 0x0000a027, + 0x00000a11, + 0x00001041, + 0x04000411, + 0x00010837, + 0x84010042, + 0x05b08021, + 0x00001841, + 0x840100c2, + 0x00000011, +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 0eb3792..547b8f5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -188,10 +188,9 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, int ref) { struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe); - unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER; unsigned s, i; - if (bind & PIPE_BIND_RENDER_TARGET) { + if (res->bind & PIPE_BIND_RENDER_TARGET) { for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) { if (nvc0->framebuffer.cbufs[i] && nvc0->framebuffer.cbufs[i]->texture == res) { @@ -202,7 +201,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } } - if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (res->bind & PIPE_BIND_DEPTH_STENCIL) { if (nvc0->framebuffer.zsbuf && nvc0->framebuffer.zsbuf->texture == res) { nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; @@ -212,13 +211,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, } } - if (bind & (PIPE_BIND_VERTEX_BUFFER | - PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_CONSTANT_BUFFER | - PIPE_BIND_SHADER_BUFFER | - PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_COMMAND_ARGS_BUFFER | - PIPE_BIND_SAMPLER_VIEW)) { + if (res->target == PIPE_BUFFER) { for (i = 0; i < nvc0->num_vtxbufs; ++i) { if (nvc0->vtxbuf[i].buffer == res) { nvc0->dirty |= NVC0_NEW_ARRAYS; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h index 27c026b..49e176c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h @@ -33,4 +33,6 @@ #define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850 +#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858 + #endif /* __NVC0_MACROS_H__ */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index 7497317..d2acce7 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -74,6 +74,24 @@ nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq, } static void +nvc0_get_query_result_resource(struct pipe_context *pipe, + struct pipe_query *pq, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset) +{ + struct nvc0_query *q = nvc0_query(pq); + if (!q->funcs->get_query_result_resource) { + assert(!"Unexpected lack of get_query_result_resource"); + return; + } + q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type, + index, resource, offset); +} + +static void nvc0_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, uint mode) @@ -129,7 +147,7 @@ nvc0_render_condition(struct pipe_context *pipe, } if (wait) - nvc0_hw_query_fifo_wait(push, q); + nvc0_hw_query_fifo_wait(nvc0, q); PUSH_SPACE(push, 7); PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); @@ -262,6 +280,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0) pipe->begin_query = nvc0_begin_query; pipe->end_query = nvc0_end_query; pipe->get_query_result = nvc0_get_query_result; + pipe->get_query_result_resource = nvc0_get_query_result_resource; pipe->render_condition = nvc0_render_condition; nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h index c46361c..a887b22 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h @@ -14,6 +14,13 @@ struct nvc0_query_funcs { void (*end_query)(struct nvc0_context *, struct nvc0_query *); boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *, boolean, union pipe_query_result *); + void (*get_query_result_resource)(struct nvc0_context *nvc0, + struct nvc0_query *q, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset); }; struct nvc0_query { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 1bed016..6238588 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -358,11 +358,119 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, return true; } +static void +nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, + struct nvc0_query *q, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_hw_query *hq = nvc0_hw_query(q); + struct nv04_resource *buf = nv04_resource(resource); + unsigned stride; + + assert(!hq->funcs || !hq->funcs->get_query_result); + + if (index == -1) { + /* TODO: Use a macro to write the availability of the query */ + if (hq->state != NVC0_HW_QUERY_STATE_READY) + nvc0_hw_query_update(nvc0->screen->base.client, q); + uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY}; + nvc0->base.push_cb(&nvc0->base, buf, offset, + result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1, + ready); + return; + } + + /* If the fence guarding this query has not been emitted, that makes a lot + * of the following logic more complicated. + */ + if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED) + nouveau_fence_emit(hq->fence); + + /* We either need to compute a 32- or 64-bit difference between 2 values, + * and then store the result as either a 32- or 64-bit value. As such let's + * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit + * ones), and have one macro that clamps result to i32, u32, or just + * outputs the difference (no need to worry about 64-bit clamping). + */ + if (hq->state != NVC0_HW_QUERY_STATE_READY) + nvc0_hw_query_update(nvc0->screen->base.client, q); + + if (wait && hq->state != NVC0_HW_QUERY_STATE_READY) + nvc0_hw_query_fifo_wait(nvc0, q); + + nouveau_pushbuf_space(push, 16, 2, 0); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR); + BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7); + if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */ + PUSH_DATA(push, 0x00000001); + else if (result_type == PIPE_QUERY_TYPE_I32) + PUSH_DATA(push, 0x7fffffff); + else if (result_type == PIPE_QUERY_TYPE_U32) + PUSH_DATA(push, 0xffffffff); + else + PUSH_DATA(push, 0x00000000); + + switch (q->type) { + case PIPE_QUERY_SO_STATISTICS: + stride = 2; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + stride = 12; + break; + default: + assert(index == 0); + stride = 1; + break; + } + + if (hq->is64bit) { + nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index, + 8 | NVC0_IB_ENTRY_1_NO_PREFETCH); + nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride), + 8 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } else { + nouveau_pushbuf_data(push, hq->bo, hq->offset + 4, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + PUSH_DATA(push, 0); + nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + PUSH_DATA(push, 0); + } + + if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) { + PUSH_DATA(push, 0); + PUSH_DATA(push, 0); + } else if (hq->is64bit) { + PUSH_DATA(push, hq->fence->sequence); + nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } else { + PUSH_DATA(push, hq->sequence); + nouveau_pushbuf_data(push, hq->bo, hq->offset, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } + + if (buf->mm) { + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); + } +} + static const struct nvc0_query_funcs hw_query_funcs = { .destroy_query = nvc0_hw_destroy_query, .begin_query = nvc0_hw_begin_query, .end_query = nvc0_hw_end_query, .get_query_result = nvc0_hw_get_query_result, + .get_query_result_resource = nvc0_hw_get_query_result_resource, }; struct nvc0_query * @@ -476,8 +584,9 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, } void -nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q) +nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q) { + struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_hw_query *hq = nvc0_hw_query(q); unsigned offset = hq->offset; @@ -486,9 +595,15 @@ nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q) PUSH_SPACE(push, 5); PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); - PUSH_DATAh(push, hq->bo->offset + offset); - PUSH_DATA (push, hq->bo->offset + offset); - PUSH_DATA (push, hq->sequence); + if (hq->is64bit) { + PUSH_DATAh(push, nvc0->screen->fence.bo->offset); + PUSH_DATA (push, nvc0->screen->fence.bo->offset); + PUSH_DATA (push, hq->fence->sequence); + } else { + PUSH_DATAh(push, hq->bo->offset + offset); + PUSH_DATA (push, hq->bo->offset + offset); + PUSH_DATA (push, hq->sequence); + } PUSH_DATA (push, (1 << 12) | NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h index 3701eb7..8225755 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h @@ -51,6 +51,6 @@ void nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *, struct nvc0_query *, unsigned); void -nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nvc0_query *); +nvc0_hw_query_fifo_wait(struct nvc0_context *, struct nvc0_query *); #endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 17ea943..45693c8 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -191,6 +191,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_QUERY_BUFFER_OBJECT: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -219,7 +220,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_STRING_MARKER: case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: - case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_VENDOR_ID: @@ -683,7 +683,7 @@ nvc0_screen_create(struct nouveau_device *dev) screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_SHADER_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_COMMAND_ARGS_BUFFER; + PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER; screen->base.sysmem_bindings |= PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; @@ -1049,6 +1049,7 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); + MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index dc02b01..382a18e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -316,7 +316,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0) continue; if (!targ->clean) - nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq)); + nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq)); nouveau_pushbuf_space(push, 0, 0, 1); BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index e8ee486..65ecc7f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -789,7 +789,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0, res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; PUSH_SPACE(push, 2); IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); - nvc0_hw_query_fifo_wait(push, nvc0_query(so->pq)); + nvc0_hw_query_fifo_wait(nvc0, nvc0_query(so->pq)); if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS) IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); -- 2.7.4