From fb1d0bfd47fb8790e0b350a0fad7bc0af39e70f4 Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Tue, 11 Aug 2009 23:43:35 +0300 Subject: [PATCH] r200: Prevent flush in middle of rendering. Patch adds prediction functionthat tries to predict emit size to the smallest possible values that is quarenteed to be higher than worst case scenario in rendering pipeline. State emit size prediction code is in place but fix for emit sizes is included in next patch. Signed-off-by: Pauli Nieminen --- src/mesa/drivers/dri/r200/r200_ioctl.h | 8 +++-- src/mesa/drivers/dri/r200/r200_tcl.c | 54 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/radeon/radeon_common.c | 23 ++++++++++++ src/mesa/drivers/dri/radeon/radeon_common.h | 1 + 4 files changed, 83 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h index 2a4b8a1..f6419f5 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.h +++ b/src/mesa/drivers/dri/r200/r200_ioctl.h @@ -125,10 +125,12 @@ static INLINE int R200_DB_STATECHANGE( * are available, you will also be adding an rmesa->state.max_state_size because * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. */ -#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) -#define VERT_AOS_BUFSZ (5 * sizeof(int)) +#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2)) +#define VERT_AOS_BUFSZ (5) #define ELTS_BUFSZ(nr) (12 + nr * 2) -#define VBUF_BUFSZ (3 * sizeof(int)) +#define VBUF_BUFSZ (3) +#define SCISSOR_BUFSZ (8) +#define INDEX_BUFSZ (8+2) static inline uint32_t cmdpacket3(int cmd_type) { diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index ca9a8db..455a4bb 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -206,6 +206,7 @@ static void r200EmitPrim( GLcontext *ctx, r200EmitPrim( ctx, prim, hwprim, start, count ); \ (void) rmesa; } while (0) +#define MAX_CONVERSION_SIZE 40 /* Try & join small primitives */ #if 0 @@ -368,6 +369,58 @@ r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) } } +/** + * Predict total emit size for next rendering operation so there is no flush in middle of rendering + * Prediction has to aim towards the best possible value that is worse than worst case scenario + */ +static void r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint space_required; + GLuint nr_aos = 0; + int i; + /* predict number of aos to emit */ + for (i = 0; i < 15; ++i) + { + if (vimap_rev[i] != 255) + { + ++nr_aos; + } + } + + { + /* count the prediction for state size */ + space_required = radeonCountEmitSize( &rmesa->radeon ); + /* vtx may be changed in r200EmitArrays so account for it if not dirty */ + if (!rmesa->hw.vtx.dirty) + space_required += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx); + /* predict size for elements */ + for (i = 0; i < VB->PrimitiveCount; ++i) + { + if (!VB->Primitive[i].count) + continue; + /* If primitive.count is less than MAX_CONVERSION_SIZE + rendering code may decide convert to elts. + In that case we have to make pessimistic prediction. + and use larger of 2 paths. */ + const GLuint elts = ELTS_BUFSZ(nr_aos); + const GLuint index = INDEX_BUFSZ; + const GLuint vbuf = VBUF_BUFSZ; + if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) + || vbuf > index + elts) + space_required += vbuf; + else + space_required += index + elts; + space_required += AOS_BUFSZ(nr_aos); + } + space_required += SCISSOR_BUFSZ; + } + /* flush the buffer in case we need more than is left. */ + rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__); +} + /**********************************************************************/ /* Render pipeline stage */ @@ -482,6 +535,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, /* Do the actual work: */ radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); + r200EnsureEmitSize( ctx, vimap_rev ); r200EmitArrays( ctx, vimap_rev ); rmesa->tcl.Elts = VB->Elts; diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index b5b4fed..20cf1f9 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -946,6 +946,29 @@ static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_s } } +/** + * Count total size for next state emit. + **/ +GLuint radeonCountEmitSize(radeonContextPtr radeon) +{ + struct radeon_state_atom *atom; + int dwords = 0; + /* check if we are going to emit full state */ + if (radeon->cmdbuf.cs->cdw && !radeon->hw.all_dirty) { + if (!radeon->hw.is_dirty) + return dwords; + foreach(atom, &radeon->hw.atomlist) { + if (atom->dirty) + dwords += atom->check(radeon->glCtx, atom); + } + } else { + foreach(atom, &radeon->hw.atomlist) { + dwords += atom->check(radeon->glCtx, atom); + } + } + return dwords; +} + static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty) { BATCH_LOCALS(radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h index cebae18..6e81100 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.h +++ b/src/mesa/drivers/dri/radeon/radeon_common.h @@ -24,6 +24,7 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa); void radeonFlush(GLcontext *ctx); void radeonFinish(GLcontext * ctx); void radeonEmitState(radeonContextPtr radeon); +GLuint radeonCountEmitSize(radeonContextPtr radeon); void radeon_clear_tris(GLcontext *ctx, GLbitfield mask); -- 2.7.4