From bc4e0c486874ee8c6f2e616b91589a1da63164d8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mathias=20Fr=C3=B6hlich?= Date: Sun, 22 May 2016 14:10:19 +0200 Subject: [PATCH] vbo: Use a bitmask to track the active arrays in vbo_exec*. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The use of a bitmask makes functions iterating only active attributes less visible in profiles. v2: Use _mesa_bit_scan{,64} instead of open coding. v3: Use u_bit_scan{,64} instead of _mesa_bit_scan{,64}. Reviewed-by: Brian Paul Signed-off-by: Mathias Fröhlich --- src/mesa/vbo/vbo_exec.h | 1 + src/mesa/vbo/vbo_exec_api.c | 146 ++++++++++++++++++++++--------------------- src/mesa/vbo/vbo_exec_draw.c | 2 + 3 files changed, 79 insertions(+), 70 deletions(-) diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h index 27bff4a..5e20cf6 100644 --- a/src/mesa/vbo/vbo_exec.h +++ b/src/mesa/vbo/vbo_exec.h @@ -101,6 +101,7 @@ struct vbo_exec_context GLuint max_vert; /**< Max number of vertices allowed in buffer */ struct vbo_exec_copied_vtx copied; + GLbitfield64 enabled; /**< mask of enabled vbo arrays. */ GLubyte attrsz[VBO_ATTRIB_MAX]; /**< nr. of attrib components (1..4) */ GLenum attrtype[VBO_ATTRIB_MAX]; /**< GL_FLOAT, GL_DOUBLE, GL_INT, etc */ GLubyte active_sz[VBO_ATTRIB_MAX]; /**< attrib size (nr. 32-bit words) */ diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 7534599..e02bb90 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -42,6 +42,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/api_arrayelt.h" #include "main/api_validate.h" #include "main/dispatch.h" +#include "util/bitscan.h" #include "vbo_context.h" #include "vbo_noop.h" @@ -167,54 +168,56 @@ static void vbo_exec_copy_to_current( struct vbo_exec_context *exec ) { struct gl_context *ctx = exec->ctx; struct vbo_context *vbo = vbo_context(ctx); - GLuint i; + GLbitfield64 enabled = exec->vtx.enabled & (~BITFIELD64_BIT(VBO_ATTRIB_POS)); - for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) { - if (exec->vtx.attrsz[i]) { - /* Note: the exec->vtx.current[i] pointers point into the - * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays. - */ - GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; - fi_type tmp[8]; /* space for doubles */ - int dmul = exec->vtx.attrtype[i] == GL_DOUBLE ? 2 : 1; - - if (exec->vtx.attrtype[i] == GL_DOUBLE) { - memset(tmp, 0, sizeof(tmp)); - memcpy(tmp, exec->vtx.attrptr[i], exec->vtx.attrsz[i] * sizeof(GLfloat)); - } else { - COPY_CLEAN_4V_TYPE_AS_UNION(tmp, - exec->vtx.attrsz[i], - exec->vtx.attrptr[i], - exec->vtx.attrtype[i]); - } + while (enabled) { + const int i = u_bit_scan64(&enabled); + + /* Note: the exec->vtx.current[i] pointers point into the + * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays. + */ + GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; + fi_type tmp[8]; /* space for doubles */ + int dmul = exec->vtx.attrtype[i] == GL_DOUBLE ? 2 : 1; + + assert(exec->vtx.attrsz[i]); + + if (exec->vtx.attrtype[i] == GL_DOUBLE) { + memset(tmp, 0, sizeof(tmp)); + memcpy(tmp, exec->vtx.attrptr[i], exec->vtx.attrsz[i] * sizeof(GLfloat)); + } else { + COPY_CLEAN_4V_TYPE_AS_UNION(tmp, + exec->vtx.attrsz[i], + exec->vtx.attrptr[i], + exec->vtx.attrtype[i]); + } - if (exec->vtx.attrtype[i] != vbo->currval[i].Type || - memcmp(current, tmp, 4 * sizeof(GLfloat) * dmul) != 0) { - memcpy(current, tmp, 4 * sizeof(GLfloat) * dmul); + if (exec->vtx.attrtype[i] != vbo->currval[i].Type || + memcmp(current, tmp, 4 * sizeof(GLfloat) * dmul) != 0) { + memcpy(current, tmp, 4 * sizeof(GLfloat) * dmul); - /* Given that we explicitly state size here, there is no need - * for the COPY_CLEAN above, could just copy 16 bytes and be - * done. The only problem is when Mesa accesses ctx->Current - * directly. - */ - /* Size here is in components - not bytes */ - vbo->currval[i].Size = exec->vtx.attrsz[i] / dmul; - vbo->currval[i]._ElementSize = vbo->currval[i].Size * sizeof(GLfloat) * dmul; - vbo->currval[i].Type = exec->vtx.attrtype[i]; - vbo->currval[i].Integer = - vbo_attrtype_to_integer_flag(exec->vtx.attrtype[i]); - vbo->currval[i].Doubles = - vbo_attrtype_to_double_flag(exec->vtx.attrtype[i]); - - /* This triggers rather too much recalculation of Mesa state - * that doesn't get used (eg light positions). - */ - if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT && - i <= VBO_ATTRIB_MAT_BACK_INDEXES) - ctx->NewState |= _NEW_LIGHT; - - ctx->NewState |= _NEW_CURRENT_ATTRIB; - } + /* Given that we explicitly state size here, there is no need + * for the COPY_CLEAN above, could just copy 16 bytes and be + * done. The only problem is when Mesa accesses ctx->Current + * directly. + */ + /* Size here is in components - not bytes */ + vbo->currval[i].Size = exec->vtx.attrsz[i] / dmul; + vbo->currval[i]._ElementSize = vbo->currval[i].Size * sizeof(GLfloat) * dmul; + vbo->currval[i].Type = exec->vtx.attrtype[i]; + vbo->currval[i].Integer = + vbo_attrtype_to_integer_flag(exec->vtx.attrtype[i]); + vbo->currval[i].Doubles = + vbo_attrtype_to_double_flag(exec->vtx.attrtype[i]); + + /* This triggers rather too much recalculation of Mesa state + * that doesn't get used (eg light positions). + */ + if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT && + i <= VBO_ATTRIB_MAT_BACK_INDEXES) + ctx->NewState |= _NEW_LIGHT; + + ctx->NewState |= _NEW_CURRENT_ATTRIB; } } @@ -311,6 +314,7 @@ vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec, exec->vtx.max_vert = vbo_compute_max_verts(exec); exec->vtx.vert_count = 0; exec->vtx.buffer_ptr = exec->vtx.buffer_map; + exec->vtx.enabled |= BITFIELD64_BIT(attr); if (unlikely(oldSize)) { /* Size changed, recalculate all the attrptr[] values @@ -345,34 +349,34 @@ vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec, if (unlikely(exec->vtx.copied.nr)) { fi_type *data = exec->vtx.copied.buffer; fi_type *dest = exec->vtx.buffer_ptr; - GLuint j; assert(exec->vtx.buffer_ptr == exec->vtx.buffer_map); for (i = 0 ; i < exec->vtx.copied.nr ; i++) { - for (j = 0 ; j < VBO_ATTRIB_MAX ; j++) { + GLbitfield64 enabled = exec->vtx.enabled; + while (enabled) { + const int j = u_bit_scan64(&enabled); GLuint sz = exec->vtx.attrsz[j]; - - if (sz) { - GLint old_offset = old_attrptr[j] - exec->vtx.vertex; - GLint new_offset = exec->vtx.attrptr[j] - exec->vtx.vertex; - - if (j == attr) { - if (oldSize) { - fi_type tmp[4]; - COPY_CLEAN_4V_TYPE_AS_UNION(tmp, oldSize, - data + old_offset, - exec->vtx.attrtype[j]); - COPY_SZ_4V(dest + new_offset, newSize, tmp); - } else { - fi_type *current = (fi_type *)vbo->currval[j].Ptr; - COPY_SZ_4V(dest + new_offset, sz, current); - } - } - else { - COPY_SZ_4V(dest + new_offset, sz, data + old_offset); - } - } + GLint old_offset = old_attrptr[j] - exec->vtx.vertex; + GLint new_offset = exec->vtx.attrptr[j] - exec->vtx.vertex; + + assert(sz); + + if (j == attr) { + if (oldSize) { + fi_type tmp[4]; + COPY_CLEAN_4V_TYPE_AS_UNION(tmp, oldSize, + data + old_offset, + exec->vtx.attrtype[j]); + COPY_SZ_4V(dest + new_offset, newSize, tmp); + } else { + fi_type *current = (fi_type *)vbo->currval[j].Ptr; + COPY_SZ_4V(dest + new_offset, sz, current); + } + } + else { + COPY_SZ_4V(dest + new_offset, sz, data + old_offset); + } } data += old_vtx_size; @@ -1145,6 +1149,7 @@ void vbo_exec_vtx_init( struct vbo_exec_context *exec ) vbo_exec_vtxfmt_init( exec ); _mesa_noop_vtxfmt_init(&exec->vtxfmt_noop); + exec->vtx.enabled = 0; for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) { assert(i < ARRAY_SIZE(exec->vtx.attrsz)); exec->vtx.attrsz[i] = 0; @@ -1273,9 +1278,10 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags ) static void reset_attrfv( struct vbo_exec_context *exec ) { - GLuint i; + while (exec->vtx.enabled) { + const int i = u_bit_scan64(&exec->vtx.enabled); + assert(exec->vtx.attrsz[i]); - for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) { exec->vtx.attrsz[i] = 0; exec->vtx.attrtype[i] = GL_FLOAT; exec->vtx.active_sz[i] = 0; diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 0d42618..8d1b2c0 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -214,6 +214,8 @@ vbo_exec_bind_arrays( struct gl_context *ctx ) exec->vtx.attrsz[VERT_ATTRIB_GENERIC0] = exec->vtx.attrsz[0]; exec->vtx.attrptr[VERT_ATTRIB_GENERIC0] = exec->vtx.attrptr[0]; exec->vtx.attrsz[0] = 0; + exec->vtx.enabled &= (~BITFIELD64_BIT(VBO_ATTRIB_POS)); + exec->vtx.enabled |= BITFIELD64_BIT(VBO_ATTRIB_GENERIC0); } break; default: -- 2.7.4