From e6caafd9d7fbfcb5906d22be9d6a3c1714e078ac Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 16 May 2012 21:08:37 +0200 Subject: [PATCH] nv50,nvc0: handle user vertex buffers And restructure VBO validation a little in the process. --- src/gallium/drivers/nouveau/nouveau_buffer.c | 20 +- src/gallium/drivers/nouveau/nouveau_buffer.h | 9 +- src/gallium/drivers/nv50/nv50_context.h | 3 + src/gallium/drivers/nv50/nv50_push.c | 20 +- src/gallium/drivers/nv50/nv50_screen.c | 2 +- src/gallium/drivers/nv50/nv50_state.c | 15 +- src/gallium/drivers/nv50/nv50_stateobj.h | 3 +- src/gallium/drivers/nv50/nv50_vbo.c | 242 +++++++++-------- src/gallium/drivers/nvc0/nvc0_context.h | 2 + src/gallium/drivers/nvc0/nvc0_screen.c | 2 +- src/gallium/drivers/nvc0/nvc0_state.c | 25 +- src/gallium/drivers/nvc0/nvc0_stateobj.h | 4 +- src/gallium/drivers/nvc0/nvc0_vbo.c | 377 ++++++++++++++++---------- src/gallium/drivers/nvc0/nvc0_vbo_translate.c | 7 +- 14 files changed, 423 insertions(+), 308 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 936e2bf..c396e3c 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -604,30 +604,28 @@ nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size) return ret; } -/* Upload data to scratch memory and update buffer address. - * Returns the bo the data resides in, if successful. - */ -struct nouveau_bo * + +/* Copy data to a scratch buffer and return address & bo the data resides in. */ +uint64_t nouveau_scratch_data(struct nouveau_context *nv, - struct nv04_resource *buf, unsigned base, unsigned size) + const void *data, unsigned base, unsigned size, + struct nouveau_bo **bo) { - struct nouveau_bo *bo; unsigned bgn = MAX2(base, nv->scratch.offset); unsigned end = bgn + size; if (end >= nv->scratch.end) { end = base + size; if (!nouveau_scratch_more(nv, end)) - return NULL; + return 0; bgn = base; } nv->scratch.offset = align(end, 4); - memcpy(nv->scratch.map + bgn, buf->data + base, size); + memcpy(nv->scratch.map + bgn, (const uint8_t *)data + base, size); - bo = nv->scratch.current; - buf->address = bo->offset + (bgn - base); - return bo; + *bo = nv->scratch.current; + return (*bo)->offset + (bgn - base); } void * diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h index 19255a3..3b8ee72 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.h +++ b/src/gallium/drivers/nouveau/nouveau_buffer.h @@ -89,11 +89,12 @@ boolean nouveau_user_buffer_upload(struct nouveau_context *, struct nv04_resource *, unsigned base, unsigned size); -/* Copy data to a scratch buffer, update buffer address. - * Returns the bo the data resides in, or NULL on failure. +/* Copy data to a scratch buffer and return address & bo the data resides in. + * Returns 0 on failure. */ -struct nouveau_bo * +uint64_t nouveau_scratch_data(struct nouveau_context *, - struct nv04_resource *, unsigned base, unsigned size); + const void *data, unsigned base, unsigned size, + struct nouveau_bo **); #endif diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 8f5363b..5da0473 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -123,8 +123,11 @@ struct nv50_context { struct pipe_index_buffer idxbuf; uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ + uint32_t vbo_constant; /* bitmask of user buffers with stride 0 */ uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */ uint32_t vb_elt_limit; /* max - min element (count - 1) */ + uint32_t instance_off; /* base vertex for instanced arrays */ + uint32_t instance_max; /* max instance for current draw call */ struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; unsigned num_textures[3]; diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index 1f7bc76..eb6bfbc 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -220,15 +220,17 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.vertex_words = nv50->vertex->vertex_size; for (i = 0; i < nv50->num_vtxbufs; ++i) { - uint8_t *data; - struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; - struct nv04_resource *res = nv04_resource(vb->buffer); + const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; + const uint8_t *data; - data = nouveau_resource_map_offset(&nv50->base, res, - vb->buffer_offset, NOUVEAU_BO_RD); + if (unlikely(vb->buffer)) + data = nouveau_resource_map_offset(&nv50->base, + nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD); + else + data = vb->user_buffer; if (apply_bias && likely(!(nv50->vertex->instance_bufs & (1 << i)))) - data += info->index_bias * vb->stride; + data += (ptrdiff_t)info->index_bias * vb->stride; ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); } @@ -304,10 +306,4 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.instance_id++; ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } - - if (info->indexed) - nouveau_resource_unmap(nv04_resource(nv50->idxbuf.buffer)); - - for (i = 0; i < nv50->num_vtxbufs; ++i) - nouveau_resource_unmap(nv04_resource(nv50->vtxbuf[i].buffer)); } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 1874f3e..d7efa35 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -151,10 +151,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: - case PIPE_CAP_USER_VERTEX_BUFFERS: return 0; /* state trackers will know better */ case PIPE_CAP_USER_CONSTANT_BUFFERS: case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_VERTEX_BUFFERS: return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 1e7d17a..81c3fa2 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -889,12 +889,23 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); unsigned i; - for (i = 0; i < count; ++i) + nv50->vbo_user = nv50->vbo_constant = 0; + + for (i = 0; i < count; ++i) { + nv50->vtxbuf[i].stride = vb[i].stride; pipe_resource_reference(&nv50->vtxbuf[i].buffer, vb[i].buffer); + if (!vb[i].buffer && vb[i].user_buffer) { + nv50->vtxbuf[i].user_buffer = vb[i].user_buffer; + nv50->vbo_user |= 1 << i; + if (!vb[i].stride) + nv50->vbo_constant |= 1 << i; + } else { + nv50->vtxbuf[i].buffer_offset = vb[i].buffer_offset; + } + } for (; i < nv50->num_vtxbufs; ++i) pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL); - memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); nv50->num_vtxbufs = count; nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h index eec1465..f75608c 100644 --- a/src/gallium/drivers/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nv50/nv50_stateobj.h @@ -50,11 +50,12 @@ struct nv50_vertex_element { }; struct nv50_vertex_stateobj { + uint32_t min_instance_div[PIPE_MAX_ATTRIBS]; + uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; struct translate *translate; unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; - uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; boolean need_conversion; unsigned vertex_size; unsigned packet_vertex_limit; diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index d21d699..4cecdea 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -62,6 +62,9 @@ nv50_vertex_state_create(struct pipe_context *pipe, memset(so->vb_access_size, 0, sizeof(so->vb_access_size)); + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) + so->min_instance_div[i] = 0xffffffff; + transkey.nr_elements = 0; transkey.output_stride = 0; @@ -109,6 +112,8 @@ nv50_vertex_state_create(struct pipe_context *pipe, if (unlikely(ve->instance_divisor)) { so->instance_elts |= 1 << i; so->instance_bufs |= 1 << vbi; + if (ve->instance_divisor < so->min_instance_div[vbi]) + so->min_instance_div[vbi] = ve->instance_divisor; } } } @@ -130,14 +135,12 @@ static void nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb, struct pipe_vertex_element *ve, unsigned attr) { - const void *data; struct nouveau_pushbuf *push = nv50->base.pushbuf; - struct nv04_resource *res = nv04_resource(vb->buffer); + const void *data = (const uint8_t *)vb->user_buffer + ve->src_offset; float v[4]; const unsigned nc = util_format_get_nr_components(ve->src_format); - data = nouveau_resource_map_offset(&nv50->base, res, vb->buffer_offset + - ve->src_offset, NOUVEAU_BO_RD); + assert(vb->user_buffer); util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1); @@ -175,8 +178,8 @@ nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb, } static INLINE void -nv50_vbuf_range(struct nv50_context *nv50, int vbi, - uint32_t *base, uint32_t *size) +nv50_user_vbuf_range(struct nv50_context *nv50, int vbi, + uint32_t *base, uint32_t *size) { if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) { /* TODO: use min and max instance divisor to get a proper range */ @@ -192,66 +195,43 @@ nv50_vbuf_range(struct nv50_context *nv50, int vbi, } static void -nv50_prevalidate_vbufs(struct nv50_context *nv50, unsigned limits[]) +nv50_upload_user_buffers(struct nv50_context *nv50, + uint64_t addrs[], uint32_t limits[]) { - const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; - struct nouveau_bo *bo; - struct pipe_vertex_buffer *vb; - struct nv04_resource *buf; - int i; - uint32_t base, size; + unsigned b; - nv50->vbo_fifo = nv50->vbo_user = 0; + for (b = 0; b < nv50->num_vtxbufs; ++b) { + struct nouveau_bo *bo; + const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b]; + uint32_t base, size; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); - - for (i = 0; i < nv50->num_vtxbufs; ++i) { - vb = &nv50->vtxbuf[i]; - limits[i] = 0; - if (!vb->stride) + if (!(nv50->vbo_user & (1 << b)) || !vb->stride) continue; - buf = nv04_resource(vb->buffer); - - if (nouveau_resource_mapped_by_gpu(vb->buffer)) { - BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD); - } else { - if (nv50->vbo_push_hint) { - nv50->vbo_fifo = ~0; - return; - } - nv50->base.vbo_dirty = TRUE; - - if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) { - assert(vb->stride > vb->buffer_offset); - nv50->vbo_user |= 1 << i; - nv50_vbuf_range(nv50, i, &base, &size); - limits[i] = base + size - 1; - bo = nouveau_scratch_data(&nv50->base, buf, base, size); - if (bo) - BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo); - } else { - if (nouveau_buffer_migrate(&nv50->base, buf, NOUVEAU_BO_GART)) - BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD); - } - } + nv50_user_vbuf_range(nv50, b, &base, &size); + + limits[b] = base + size - 1; + addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size, + &bo); + if (addrs[b]) + BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART | + NOUVEAU_BO_RD, bo); } + nv50->base.vbo_dirty = TRUE; } static void nv50_update_user_vbufs(struct nv50_context *nv50) { - const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; - struct nouveau_bo *bo; + uint64_t address[PIPE_MAX_ATTRIBS]; struct nouveau_pushbuf *push = nv50->base.pushbuf; - uint32_t base, offset, size; - int i; + unsigned i; uint32_t written = 0; for (i = 0; i < nv50->vertex->num_elements; ++i) { struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe; - const int b = ve->vertex_buffer_index; + const unsigned b = ve->vertex_buffer_index; struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b]; - struct nv04_resource *buf = nv04_resource(vb->buffer); + uint32_t base, size; if (!(nv50->vbo_user & (1 << b))) continue; @@ -260,22 +240,24 @@ nv50_update_user_vbufs(struct nv50_context *nv50) nv50_emit_vtxattr(nv50, vb, ve, i); continue; } - nv50_vbuf_range(nv50, b, &base, &size); + nv50_user_vbuf_range(nv50, b, &base, &size); if (!(written & (1 << b))) { + struct nouveau_bo *bo; + const uint32_t bo_flags = NOUVEAU_BO_GART | NOUVEAU_BO_RD; written |= 1 << b; - bo = nouveau_scratch_data(&nv50->base, buf, base, size); - if (bo) + address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, + base, size, &bo); + if (address[b]) BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo); } - offset = vb->buffer_offset + ve->src_offset; BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); - PUSH_DATAh(push, buf->address + base + size - 1); - PUSH_DATA (push, buf->address + base + size - 1); + PUSH_DATAh(push, address[b] + base + size - 1); + PUSH_DATA (push, address[b] + base + size - 1); BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2); - PUSH_DATAh(push, buf->address + offset); - PUSH_DATA (push, buf->address + offset); + PUSH_DATAh(push, address[b] + ve->src_offset); + PUSH_DATA (push, address[b] + ve->src_offset); } nv50->base.vbo_dirty = TRUE; } @@ -292,90 +274,112 @@ nv50_release_user_vbufs(struct nv50_context *nv50) void nv50_vertex_arrays_validate(struct nv50_context *nv50) { + uint64_t addrs[PIPE_MAX_ATTRIBS]; + uint32_t limits[PIPE_MAX_ATTRIBS]; struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_vertex_stateobj *vertex = nv50->vertex; struct pipe_vertex_buffer *vb; struct nv50_vertex_element *ve; + uint32_t mask; + uint32_t refd = 0; unsigned i; - unsigned limits[PIPE_MAX_ATTRIBS]; /* user vertex buffer limits */ + const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts); - if (unlikely(vertex->need_conversion)) { + if (unlikely(vertex->need_conversion)) nv50->vbo_fifo = ~0; - nv50->vbo_user = 0; - } else { - nv50_prevalidate_vbufs(nv50, limits); + else + if (nv50->vbo_user & ~nv50->vbo_constant) + nv50->vbo_fifo = nv50->vbo_push_hint ? ~0 : 0; + else + nv50->vbo_fifo = 0; + + /* update vertex format state */ + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(0)), n); + if (nv50->vbo_fifo) { + nv50->state.num_vtxelts = vertex->num_elements; + for (i = 0; i < vertex->num_elements; ++i) + PUSH_DATA (push, vertex->element[i].state); + for (; i < n; ++i) + PUSH_DATA (push, NV50_3D_VERTEX_ATTRIB_INACTIVE); + for (i = 0; i < n; ++i) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, 0); + } + return; } - - BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(0)), vertex->num_elements); for (i = 0; i < vertex->num_elements; ++i) { + const unsigned b = vertex->element[i].pipe.vertex_buffer_index; ve = &vertex->element[i]; - vb = &nv50->vtxbuf[ve->pipe.vertex_buffer_index]; + vb = &nv50->vtxbuf[b]; - if (likely(vb->stride) || nv50->vbo_fifo) { + if (likely(vb->stride) || !(nv50->vbo_user & (1 << b))) PUSH_DATA(push, ve->state); - } else { + else PUSH_DATA(push, ve->state | NV50_3D_VERTEX_ARRAY_ATTRIB_CONST); - nv50->vbo_fifo &= ~(1 << i); - } } + for (; i < n; ++i) + PUSH_DATA(push, NV50_3D_VERTEX_ATTRIB_INACTIVE); + + /* update per-instance enables */ + mask = vertex->instance_elts ^ nv50->state.instance_elts; + while (mask) { + const int i = ffs(mask) - 1; + mask &= ~(1 << i); + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); + PUSH_DATA (push, (vertex->instance_elts >> i) & 1); + } + nv50->state.instance_elts = vertex->instance_elts; + + if (nv50->vbo_user & ~nv50->vbo_constant) + nv50_upload_user_buffers(nv50, addrs, limits); + /* update buffers and set constant attributes */ for (i = 0; i < vertex->num_elements; ++i) { - struct nv04_resource *res; - unsigned limit, offset; - + uint64_t address, limit; + const unsigned b = vertex->element[i].pipe.vertex_buffer_index; ve = &vertex->element[i]; - vb = &nv50->vtxbuf[ve->pipe.vertex_buffer_index]; + vb = &nv50->vtxbuf[b]; - if (unlikely(ve->pipe.instance_divisor)) { - if (!(nv50->state.instance_elts & (1 << i))) { - BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); - PUSH_DATA (push, 1); - } - BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_DIVISOR(i)), 1); - PUSH_DATA (push, ve->pipe.instance_divisor); - } else - if (unlikely(nv50->state.instance_elts & (1 << i))) { - BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); - PUSH_DATA (push, 0); - } - - res = nv04_resource(vb->buffer); - - if (nv50->vbo_fifo || unlikely(vb->stride == 0)) { - if (!nv50->vbo_fifo) - nv50_emit_vtxattr(nv50, vb, &ve->pipe, i); + if (unlikely(nv50->vbo_constant & (1 << b))) { BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); PUSH_DATA (push, 0); + nv50_emit_vtxattr(nv50, vb, &ve->pipe, i); continue; + } else + if (nv50->vbo_user & (1 << b)) { + address = addrs[b] + ve->pipe.src_offset; + limit = addrs[b] + limits[b]; + } else { + struct nv04_resource *buf = nv04_resource(vb->buffer); + if (!(refd & (1 << b))) { + refd |= 1 << b; + BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD); + } + address = buf->address + vb->buffer_offset + ve->pipe.src_offset; + limit = buf->address + buf->base.width0 - 1; } - offset = ve->pipe.src_offset + vb->buffer_offset; - limit = limits[ve->pipe.vertex_buffer_index]; - if (!limit) - limit = vb->buffer->width0 - 1; - - BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); - PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + if (unlikely(ve->pipe.instance_divisor)) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 4); + PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + PUSH_DATA (push, ve->pipe.instance_divisor); + } else { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 3); + PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + } BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); - PUSH_DATAh(push, res->address + limit); - PUSH_DATA (push, res->address + limit); - BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2); - PUSH_DATAh(push, res->address + offset); - PUSH_DATA (push, res->address + offset); + PUSH_DATAh(push, limit); + PUSH_DATA (push, limit); } for (; i < nv50->state.num_vtxelts; ++i) { - BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(i)), 1); - PUSH_DATA (push, NV50_3D_VERTEX_ATTRIB_INACTIVE); - if (unlikely(nv50->state.instance_elts & (1 << i))) { - BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1); - PUSH_DATA (push, 0); - } BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); PUSH_DATA (push, 0); } - nv50->state.num_vtxelts = vertex->num_elements; - nv50->state.instance_elts = vertex->instance_elts; } #define NV50_PRIM_GL_CASE(n) \ @@ -703,6 +707,8 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ nv50->vb_elt_first = info->min_index + info->index_bias; nv50->vb_elt_limit = info->max_index - info->min_index; + nv50->instance_off = info->start_instance; + nv50->instance_max = info->instance_count - 1; /* For picking only a few vertices from a large user buffer, push is better, * if index count is larger and we expect repeated vertices, suggest upload. @@ -710,11 +716,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50->vbo_push_hint = /* the 64 is heuristic */ !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count)); - if (nv50->vbo_push_hint != !!nv50->vbo_fifo) - nv50->dirty |= NV50_NEW_ARRAYS; - - if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS))) - nv50_update_user_vbufs(nv50); + if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) { + if (!!nv50->vbo_fifo != nv50->vbo_push_hint) + nv50->dirty |= NV50_NEW_ARRAYS; + else + if (!nv50->vbo_fifo) + nv50_update_user_vbufs(nv50); + } if (unlikely(nv50->num_so_targets && !nv50->gmtyprog)) nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode]; diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 261cfb1..ef79222 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -131,6 +131,8 @@ struct nvc0_context { uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */ uint32_t vb_elt_limit; /* max - min element (count - 1) */ + uint32_t instance_off; /* current base vertex for instanced arrays */ + uint32_t instance_max; /* last instance for current draw call */ struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS]; unsigned num_textures[5]; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 3698e71..0717ac8 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -139,10 +139,10 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: - case PIPE_CAP_USER_VERTEX_BUFFERS: return 0; /* state trackers will know better */ case PIPE_CAP_USER_CONSTANT_BUFFERS: case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_VERTEX_BUFFERS: return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 5eee9d4..01bd5ce 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -756,13 +756,20 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, uint32_t constant_vbos = 0; unsigned i; + nvc0->vbo_user = 0; + if (count != nvc0->num_vtxbufs) { for (i = 0; i < count; ++i) { pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer); - nvc0->vtxbuf[i].buffer_offset = vb[i].buffer_offset; + if (vb[i].user_buffer) { + nvc0->vbo_user |= 1 << i; + nvc0->vtxbuf[i].user_buffer = vb[i].user_buffer; + if (!vb[i].stride) + constant_vbos |= 1 << i; + } else { + nvc0->vtxbuf[i].buffer_offset = vb[i].buffer_offset; + } nvc0->vtxbuf[i].stride = vb[i].stride; - if (!vb[i].stride) - constant_vbos |= 1 << i; } for (; i < nvc0->num_vtxbufs; ++i) pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL); @@ -771,6 +778,13 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, nvc0->dirty |= NVC0_NEW_ARRAYS; } else { for (i = 0; i < count; ++i) { + if (vb[i].user_buffer) { + nvc0->vtxbuf[i].user_buffer = vb[i].user_buffer; + nvc0->vbo_user |= 1 << i; + if (!vb[i].stride) + constant_vbos |= 1 << i; + assert(!vb[i].buffer); + } if (nvc0->vtxbuf[i].buffer == vb[i].buffer && nvc0->vtxbuf[i].buffer_offset == vb[i].buffer_offset && nvc0->vtxbuf[i].stride == vb[i].stride) @@ -778,10 +792,7 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer); nvc0->vtxbuf[i].buffer_offset = vb[i].buffer_offset; nvc0->vtxbuf[i].stride = vb[i].stride; - if (likely(vb[i].stride)) - nvc0->dirty |= NVC0_NEW_ARRAYS; - else - constant_vbos |= 1 << i; + nvc0->dirty |= NVC0_NEW_ARRAYS; } } if (constant_vbos != nvc0->constant_vbos) { diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index 5afbffb..edab60b 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -49,11 +49,13 @@ struct nvc0_vertex_element { }; struct nvc0_vertex_stateobj { + uint32_t min_instance_div[PIPE_MAX_ATTRIBS]; + uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; struct translate *translate; unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; - uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; + boolean shared_slots; boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ unsigned size; /* size of vertex in bytes (when packed) */ struct nvc0_vertex_element element[0]; diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index c1c9050..61a6341 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -52,6 +52,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, struct nvc0_vertex_stateobj *so; struct translate_key transkey; unsigned i; + unsigned src_offset_max = 0; so = MALLOC(sizeof(*so) + num_elements * sizeof(struct nvc0_vertex_element)); @@ -60,10 +61,14 @@ nvc0_vertex_state_create(struct pipe_context *pipe, so->num_elements = num_elements; so->instance_elts = 0; so->instance_bufs = 0; + so->shared_slots = FALSE; so->need_conversion = FALSE; memset(so->vb_access_size, 0, sizeof(so->vb_access_size)); + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) + so->min_instance_div[i] = 0xffffffff; + transkey.nr_elements = 0; transkey.output_stride = 0; @@ -91,12 +96,16 @@ nvc0_vertex_state_create(struct pipe_context *pipe, } size = util_format_get_blocksize(fmt); + src_offset_max = MAX2(src_offset_max, ve->src_offset); + if (so->vb_access_size[vbi] < (ve->src_offset + size)) so->vb_access_size[vbi] = ve->src_offset + size; if (unlikely(ve->instance_divisor)) { so->instance_elts |= 1 << i; so->instance_bufs |= 1 << vbi; + if (ve->instance_divisor < so->min_instance_div[vbi]) + so->min_instance_div[vbi] = ve->instance_divisor; } if (1) { @@ -129,6 +138,17 @@ nvc0_vertex_state_create(struct pipe_context *pipe, so->size = transkey.output_stride; so->translate = translate_create(&transkey); + if (so->instance_elts || src_offset_max >= (1 << 14)) + return so; + so->shared_slots = TRUE; + + for (i = 0; i < num_elements; ++i) { + const unsigned b = elements[i].vertex_buffer_index; + const unsigned s = elements[i].src_offset; + so->element[i].state &= ~NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK; + so->element[i].state |= b << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT; + so->element[i].state |= s << NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT; + } return so; } @@ -143,54 +163,47 @@ nvc0_vertex_state_create(struct pipe_context *pipe, ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT)) static void -nvc0_update_constant_vertex_attribs(struct nvc0_context *nvc0) +nvc0_set_constant_vertex_attrib(struct nvc0_context *nvc0, const unsigned a) { - uint32_t mask = nvc0->state.constant_elts; - - while (unlikely(mask)) { - const int i = ffs(mask) - 1; - uint32_t mode; - struct nouveau_pushbuf *push = nvc0->base.pushbuf; - struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; - struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; - const struct util_format_description *desc; - void *dst; - const void *src = nouveau_resource_map_offset(&nvc0->base, - nv04_resource(vb->buffer), - vb->buffer_offset + ve->src_offset, NOUVEAU_BO_RD); - - mask &= ~(1 << i); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct pipe_vertex_element *ve = &nvc0->vertex->element[a].pipe; + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; + uint32_t mode; + const struct util_format_description *desc; + void *dst; + const void *src = (const uint8_t *)vb->user_buffer + ve->src_offset; + assert(!vb->buffer); - desc = util_format_description(ve->src_format); + desc = util_format_description(ve->src_format); - PUSH_SPACE(push, 6); - BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5); - dst = push->cur + 1; - if (desc->channel[0].pure_integer) { - if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { - mode = VTX_ATTR(i, 4, SINT, 32); - desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1); - } else { - mode = VTX_ATTR(i, 4, UINT, 32); - desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1); - } + PUSH_SPACE(push, 6); + BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5); + dst = &push->cur[1]; + if (desc->channel[0].pure_integer) { + if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + mode = VTX_ATTR(a, 4, SINT, 32); + desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1); } else { - mode = VTX_ATTR(i, 4, FLOAT, 32); - desc->unpack_rgba_float(dst, 0, src, 0, 1, 1); + mode = VTX_ATTR(a, 4, UINT, 32); + desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1); } - *push->cur = mode; - push->cur += 5; + } else { + mode = VTX_ATTR(a, 4, FLOAT, 32); + desc->unpack_rgba_float(dst, 0, src, 0, 1, 1); } + push->cur[0] = mode; + push->cur += 5; } static INLINE void -nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, - uint32_t *base, uint32_t *size) +nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi, + uint32_t *base, uint32_t *size) { if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { - /* TODO: use min and max instance divisor to get a proper range */ - *base = 0; - *size = nvc0->vtxbuf[vbi].buffer->width0; + const uint32_t div = nvc0->vertex->min_instance_div[vbi]; + *base = nvc0->instance_off * nvc0->vtxbuf[vbi].stride; + *size = (nvc0->instance_max / div) * nvc0->vtxbuf[vbi].stride + + nvc0->vertex->vb_access_size[vbi]; } else { /* NOTE: if there are user buffers, we *must* have index bounds */ assert(nvc0->vb_elt_limit != ~0); @@ -200,100 +213,196 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi, } } -/* Return whether to use alternative vertex submission mode (translate), - * and validate vertex buffers and upload user arrays (if normal mode). - */ -static uint8_t -nvc0_prevalidate_vbufs(struct nvc0_context *nvc0, unsigned limits[]) +static INLINE void +nvc0_release_user_vbufs(struct nvc0_context *nvc0) { - const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; - struct nouveau_bo *bo; - struct pipe_vertex_buffer *vb; - struct nv04_resource *buf; - int i; - uint32_t base, size; - - nvc0->vbo_user = 0; - - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); - - for (i = 0; i < nvc0->num_vtxbufs; ++i) { - vb = &nvc0->vtxbuf[i]; - limits[i] = 0; - if (!vb->stride) - continue; - buf = nv04_resource(vb->buffer); - - if (!nouveau_resource_mapped_by_gpu(vb->buffer)) { - if (nvc0->vbo_push_hint) - return 1; - nvc0->base.vbo_dirty = TRUE; - - if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) { - assert(vb->stride > vb->buffer_offset); - nvc0->vbo_user |= 1 << i; - nvc0_vbuf_range(nvc0, i, &base, &size); - limits[i] = base + size - 1; - bo = nouveau_scratch_data(&nvc0->base, buf, base, size); - if (bo) - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); - continue; - } else { - nouveau_buffer_migrate(&nvc0->base, buf, NOUVEAU_BO_GART); - } - } - BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD); + if (nvc0->vbo_user) { + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); + nouveau_scratch_done(&nvc0->base); } - return 0; } static void nvc0_update_user_vbufs(struct nvc0_context *nvc0) { - const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; - struct nouveau_bo *bo; + uint64_t address[PIPE_MAX_ATTRIBS]; struct nouveau_pushbuf *push = nvc0->base.pushbuf; - uint32_t base, offset, size; int i; uint32_t written = 0; PUSH_SPACE(push, nvc0->vertex->num_elements * 8); - for (i = 0; i < nvc0->vertex->num_elements; ++i) { struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; - const int b = ve->vertex_buffer_index; + const unsigned b = ve->vertex_buffer_index; struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; - struct nv04_resource *buf = nv04_resource(vb->buffer); + uint32_t base, size; - if (!(nvc0->vbo_user & (1 << b)) || !vb->stride) + if (!(nvc0->vbo_user & (1 << b))) + continue; + if (!vb->stride) { + nvc0_set_constant_vertex_attrib(nvc0, i); continue; - nvc0_vbuf_range(nvc0, b, &base, &size); + } + nvc0_user_vbuf_range(nvc0, b, &base, &size); if (!(written & (1 << b))) { + struct nouveau_bo *bo; + const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; written |= 1 << b; - bo = nouveau_scratch_data(&nvc0->base, buf, base, size); + address[b] = nouveau_scratch_data(&nvc0->base, vb->user_buffer, + base, size, &bo); if (bo) BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); } - offset = vb->buffer_offset + ve->src_offset; BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5); PUSH_DATA (push, i); - PUSH_DATAh(push, buf->address + base + size - 1); - PUSH_DATA (push, buf->address + base + size - 1); - PUSH_DATAh(push, buf->address + offset); - PUSH_DATA (push, buf->address + offset); + PUSH_DATAh(push, address[b] + base + size - 1); + PUSH_DATA (push, address[b] + base + size - 1); + PUSH_DATAh(push, address[b] + ve->src_offset); + PUSH_DATA (push, address[b] + ve->src_offset); } nvc0->base.vbo_dirty = TRUE; } -static INLINE void -nvc0_release_user_vbufs(struct nvc0_context *nvc0) +static void +nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0) { - if (nvc0->vbo_user) { - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); - nouveau_scratch_done(&nvc0->base); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + uint32_t mask = nvc0->vbo_user & ~nvc0->constant_vbos; + + PUSH_SPACE(push, nvc0->num_vtxbufs * 8); + while (mask) { + struct nouveau_bo *bo; + const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART; + uint64_t address; + uint32_t base, size; + const int b = ffs(mask) - 1; + mask &= ~(1 << b); + + nvc0_user_vbuf_range(nvc0, b, &base, &size); + + address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].user_buffer, + base, size, &bo); + if (bo) + BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); + + BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5); + PUSH_DATA (push, b); + PUSH_DATAh(push, address + base + size - 1); + PUSH_DATA (push, address + base + size - 1); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + } + + mask = nvc0->state.constant_elts; + while (mask) { + int i = ffs(mask) - 1; + mask &= ~(1 << i); + nvc0_set_constant_vertex_attrib(nvc0, i); + } +} + +static void +nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const struct nvc0_vertex_stateobj *vertex = nvc0->vertex; + uint32_t refd = 0; + unsigned i; + + PUSH_SPACE(push, vertex->num_elements * 8); + for (i = 0; i < vertex->num_elements; ++i) { + const struct nvc0_vertex_element *ve; + const struct pipe_vertex_buffer *vb; + struct nv04_resource *res; + unsigned b; + unsigned limit, offset; + + if (nvc0->state.constant_elts & (1 << i)) + continue; + ve = &vertex->element[i]; + b = ve->pipe.vertex_buffer_index; + vb = &nvc0->vtxbuf[b]; + + if (!vb->buffer) { + if (vb->stride) { + if (ve->pipe.instance_divisor) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1); + PUSH_DATA (push, ve->pipe.instance_divisor); + } + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, (1 << 12) | vb->stride); + } + /* address/value set in nvc0_update_user_vbufs */ + continue; + } + res = nv04_resource(vb->buffer); + offset = ve->pipe.src_offset + vb->buffer_offset; + limit = vb->buffer->width0 - 1; + + if (unlikely(ve->pipe.instance_divisor)) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4); + PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATAh(push, res->address + offset); + PUSH_DATA (push, res->address + offset); + PUSH_DATA (push, ve->pipe.instance_divisor); + } else { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3); + PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATAh(push, res->address + offset); + PUSH_DATA (push, res->address + offset); + } + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + PUSH_DATAh(push, res->address + limit); + PUSH_DATA (push, res->address + limit); + + if (!(refd & (1 << b))) { + refd |= 1 << b; + BCTX_REFN(nvc0->bufctx_3d, VTX, res, RD); + } + } + if (nvc0->vbo_user) + nvc0_update_user_vbufs(nvc0); +} + +static void +nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned b; + const uint32_t mask = nvc0->vbo_user; + + PUSH_SPACE(push, nvc0->num_vtxbufs * 8); + for (b = 0; b < nvc0->num_vtxbufs; ++b) { + struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b]; + struct nv04_resource *buf; + uint32_t offset, limit; + + if (mask & (1 << b)) { + if (vb->stride) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 1); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + } + /* address/value set in nvc0_update_user_vbufs_shared */ + continue; + } + buf = nv04_resource(vb->buffer); + offset = vb->buffer_offset; + limit = buf->base.width0 - 1; + + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 3); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2); + PUSH_DATAh(push, buf->address + limit); + PUSH_DATA (push, buf->address + limit); + + BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD); } + if (nvc0->vbo_user) + nvc0_update_user_vbufs_shared(nvc0); } void @@ -301,20 +410,19 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_vertex_stateobj *vertex = nvc0->vertex; - struct pipe_vertex_buffer *vb; struct nvc0_vertex_element *ve; uint32_t const_vbos; unsigned i; - unsigned limits[PIPE_MAX_ATTRIBS]; uint8_t vbo_mode; boolean update_vertex; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + if (unlikely(vertex->need_conversion) || unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) { - nvc0->vbo_user = 0; vbo_mode = 3; } else { - vbo_mode = nvc0_prevalidate_vbufs(nvc0, limits); + vbo_mode = (nvc0->vbo_user && nvc0->vbo_push_hint) ? 1 : 0; } const_vbos = vbo_mode ? 0 : nvc0->constant_vbos; @@ -386,38 +494,10 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) if (nvc0->state.vbo_mode) /* using translate, don't set up arrays here */ return; - PUSH_SPACE(push, vertex->num_elements * 8); - for (i = 0; i < vertex->num_elements; ++i) { - struct nv04_resource *res; - unsigned limit, offset; - - if (nvc0->state.constant_elts & (1 << i)) - continue; - ve = &vertex->element[i]; - vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index]; - - res = nv04_resource(vb->buffer); - offset = ve->pipe.src_offset + vb->buffer_offset; - limit = limits[ve->pipe.vertex_buffer_index]; - if (!limit) - limit = vb->buffer->width0 - 1; - - if (unlikely(ve->pipe.instance_divisor)) { - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4); - PUSH_DATA (push, (1 << 12) | vb->stride); - PUSH_DATAh(push, res->address + offset); - PUSH_DATA (push, res->address + offset); - PUSH_DATA (push, ve->pipe.instance_divisor); - } else { - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3); - PUSH_DATA (push, (1 << 12) | vb->stride); - PUSH_DATAh(push, res->address + offset); - PUSH_DATA (push, res->address + offset); - } - BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); - PUSH_DATAh(push, res->address + limit); - PUSH_DATA (push, res->address + limit); - } + if (vertex->shared_slots) + nvc0_validate_vertex_buffers_shared(nvc0); + else + nvc0_validate_vertex_buffers(nvc0); } void @@ -710,6 +790,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ nvc0->vb_elt_first = info->min_index + info->index_bias; nvc0->vb_elt_limit = info->max_index - info->min_index; + nvc0->instance_off = info->start_instance; + nvc0->instance_max = info->instance_count - 1; /* For picking only a few vertices from a large user buffer, push is better, * if index count is larger and we expect repeated vertices, suggest upload. @@ -717,20 +799,18 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0->vbo_push_hint = info->indexed && (nvc0->vb_elt_limit >= (info->count * 2)); - /* Check whether we want to switch vertex-submission mode, - * and if not, update user vbufs. - */ - if (!(nvc0->dirty & NVC0_NEW_ARRAYS)) { - if (nvc0->vbo_push_hint) { - if (nvc0->vbo_user) - nvc0->dirty |= NVC0_NEW_ARRAYS; /* switch to translate mode */ - } else - if (nvc0->state.vbo_mode == 1) { - nvc0->dirty |= NVC0_NEW_ARRAYS; /* back to normal mode */ + /* Check whether we want to switch vertex-submission mode. */ + if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_ARRAYS | NVC0_NEW_VERTEX))) { + if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode) + if (nvc0->state.vbo_mode != 3) + nvc0->dirty |= NVC0_NEW_ARRAYS; + + if (!(nvc0->dirty & NVC0_NEW_ARRAYS) && nvc0->state.vbo_mode == 0) { + if (nvc0->vertex->shared_slots) + nvc0_update_user_vbufs_shared(nvc0); + else + nvc0_update_user_vbufs(nvc0); } - if (nvc0->vbo_user && - !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS))) - nvc0_update_user_vbufs(nvc0); } /* 8 as minimum to avoid immediate double validation of new buffers */ @@ -743,7 +823,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) push->kick_notify = nvc0_default_kick_notify; return; } - nvc0_update_constant_vertex_attribs(nvc0); /* space for base instance, flush, and prim restart */ PUSH_SPACE(push, 8); diff --git a/src/gallium/drivers/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nvc0/nvc0_vbo_translate.c index 6317c21..42d9e05 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo_translate.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo_translate.c @@ -65,8 +65,11 @@ nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias) const uint8_t *map; const struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; - map = nouveau_resource_map_offset(&nvc0->base, - nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD); + if (likely(!vb->buffer)) + map = (const uint8_t *)vb->user_buffer; + else + map = nouveau_resource_map_offset(&nvc0->base, + nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD); if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i))) map += (intptr_t)index_bias * vb->stride; -- 2.7.4