From 0926a5f4a110926711d0f8a4eb297dc772b09aa2 Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Sat, 29 Aug 2009 00:46:08 +0300 Subject: [PATCH] radeon: Fix swtcl emit pediction. Problem was to find the correct place to run prediction. Only place that is called for every primitive is ALLOC_VERTS so we have to do prediction there before allocation. --- src/mesa/drivers/dri/r200/r200_swtcl.c | 19 ++++++--- src/mesa/drivers/dri/r300/r300_swtcl.c | 63 +++++++++++++++++------------- src/mesa/drivers/dri/radeon/radeon_dma.c | 13 +++--- src/mesa/drivers/dri/radeon/radeon_swtcl.c | 20 ++++++---- 4 files changed, 70 insertions(+), 45 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 3d4e701..0e7d24e 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -201,9 +201,10 @@ static void r200SetVertexFormat( GLcontext *ctx ) } } -static void r200_predict_emit_size( GLcontext *ctx ) +static void r200_predict_emit_size( r200ContextPtr rmesa ) { - r200ContextPtr rmesa = R200_CONTEXT( ctx ); + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s\n", __func__); const int vertex_array_size = 7; const int prim_size = 3; if (!rmesa->radeon.swtcl.emit_prediction) { @@ -226,7 +227,6 @@ static void r200RenderStart( GLcontext *ctx ) r200SetVertexFormat( ctx ); if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s\n", __func__); - r200_predict_emit_size( ctx ); } @@ -310,7 +310,6 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset) rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); rmesa->radeon.swtcl.emit_prediction = 0; - r200_predict_emit_size( ctx ); } @@ -358,11 +357,21 @@ static void r200ResetLineStipple( GLcontext *ctx ); #define HAVE_POLYGONS 1 #define HAVE_ELTS 0 +static void* r200_alloc_verts( r200ContextPtr rmesa, GLuint n, GLuint size) +{ + void *rv; + do { + r200_predict_emit_size( rmesa ); + rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ); + } while(!rv); + return rv; +} + #undef LOCAL_VARS #undef ALLOC_VERTS #define CTX_ARG r200ContextPtr rmesa #define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size -#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) +#define ALLOC_VERTS( n, size ) r200_alloc_verts(rmesa, n, size) #define LOCAL_VARS \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ const char *r200verts = (char *)rmesa->radeon.swtcl.verts; diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index ca17f30..e4a56ca 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -43,9 +43,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define EMIT_ATTR( ATTR, STYLE ) \ do { \ - rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ - rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ - rmesa->radeon.swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ } while (0) #define EMIT_PAD( N ) \ @@ -242,6 +242,27 @@ static void r300PrepareVertices(GLcontext *ctx) rmesa->radeon.swtcl.vertex_size /= 4; } +static void r300_predict_emit_size( r300ContextPtr rmesa ) +{ + if (!rmesa->radeon.swtcl.emit_prediction) { + const int vertex_size = 7; + const int prim_size = 3; + const int cache_flush_size = 4; + const int state_size = radeonCountStateEmitSize(&rmesa->radeon); + + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, + state_size + + + vertex_size + prim_size, + __FUNCTION__)) + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); + else + rmesa->radeon.swtcl.emit_prediction = state_size; + + rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + prim_size + cache_flush_size * 2; + } +} + static GLuint reduced_prim[] = { GL_POINTS, @@ -275,11 +296,21 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); #define HAVE_POLYGONS 1 #define HAVE_ELTS 1 +static void* r300_alloc_verts(r300ContextPtr rmesa, GLuint n, GLuint size) +{ + void *rv; + do { + r300_predict_emit_size( rmesa ); + rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ); + } while (!rv); + return rv; +} + #undef LOCAL_VARS #undef ALLOC_VERTS #define CTX_ARG r300ContextPtr rmesa #define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size -#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) +#define ALLOC_VERTS( n, size ) r300_alloc_verts(rmesa, n, size); #define LOCAL_VARS \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ const char *r300verts = (char *)rmesa->radeon.swtcl.verts; @@ -490,28 +521,6 @@ static void r300ChooseRenderState( GLcontext *ctx ) rmesa->radeon.swtcl.RenderIndex = index; } } -static void r300_predict_emit_size( GLcontext *ctx ) -{ - r300ContextPtr rmesa = R300_CONTEXT( ctx ); - if (!rmesa->radeon.swtcl.emit_prediction) { - const int vertex_size = 7; - const int prim_size = 3; - const int cache_flush_size = 4; - const int state_size = radeonCountStateEmitSize(&rmesa->radeon); - - if (rcommonEnsureCmdBufSpace(&rmesa->radeon, - state_size + - + vertex_size + prim_size, - __FUNCTION__)) - rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); - else - rmesa->radeon.swtcl.emit_prediction = state_size; - - rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw - + vertex_size + prim_size + cache_flush_size * 2; - } -} - void r300RenderStart(GLcontext *ctx) { @@ -529,7 +538,6 @@ void r300RenderStart(GLcontext *ctx) r300UpdateShaderStates(rmesa); - r300_predict_emit_size( ctx ); /* investigate if we can put back flush optimisation if needed */ if (rmesa->radeon.dma.flush != NULL) { @@ -670,6 +678,5 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); rmesa->radeon.swtcl.emit_prediction = 0; - r300_predict_emit_size( ctx ); COMMIT_BATCH(); } diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index 7aa5967..386262b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -182,9 +182,6 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) fprintf(stderr, "%s\n", __FUNCTION__); - if (rmesa->dma.flush) { - rmesa->dma.flush(rmesa->glCtx); - } /* unmap old reserved bo */ if (!is_empty_list(&rmesa->dma.reserved)) @@ -430,9 +427,15 @@ rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) void *head; if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - if (is_empty_list(&rmesa->dma.reserved) - || rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { + if(is_empty_list(&rmesa->dma.reserved) + ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { + if (rmesa->dma.flush) { + rmesa->dma.flush(rmesa->glCtx); + } + radeonRefillCurrentDmaRegion(rmesa, bytes); + + return NULL; } if (!rmesa->dma.flush) { diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index 7358e22..32df569 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -220,9 +220,8 @@ static void radeonSetVertexFormat( GLcontext *ctx ) } } -static void radeon_predict_emit_size( GLcontext* ctx ) +static void radeon_predict_emit_size( r100ContextPtr rmesa ) { - r100ContextPtr rmesa = R100_CONTEXT( ctx ); if (!rmesa->radeon.swtcl.emit_prediction) { const int state_size = radeonCountStateEmitSize( &rmesa->radeon ); @@ -251,7 +250,6 @@ static void radeonRenderStart( GLcontext *ctx ) if (rmesa->radeon.dma.flush != 0 && rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim) rmesa->radeon.dma.flush( ctx ); - radeon_predict_emit_size( ctx ); } @@ -324,7 +322,6 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset) " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); - radeon_predict_emit_size( ctx ); rmesa->radeon.swtcl.emit_prediction = 0; @@ -369,6 +366,16 @@ radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim ) // assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start); } +static void* radeon_alloc_verts( r100ContextPtr rmesa , GLuint nr, GLuint size ) +{ + void *rv; + do { + radeon_predict_emit_size( rmesa ); + rv = rcommonAllocDmaLowVerts( &rmesa->radeon, nr, size ); + } while (!rv); + return rv; +} + #define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx) #define INIT( prim ) radeonDmaPrimitive( rmesa, prim ) #define FLUSH() RADEON_NEWPRIM( rmesa ) @@ -376,8 +383,7 @@ radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim ) // (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4)) #define GET_SUBSEQUENT_VB_MAX_VERTS() \ ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4)) -#define ALLOC_VERTS( nr ) \ - rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 ) +#define ALLOC_VERTS( nr ) radeon_alloc_verts( rmesa, nr, rmesa->radeon.swtcl.vertex_size * 4 ) #define EMIT_VERTS( ctx, j, nr, buf ) \ _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf) @@ -470,7 +476,7 @@ static void radeonResetLineStipple( GLcontext *ctx ); #undef ALLOC_VERTS #define CTX_ARG r100ContextPtr rmesa #define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size -#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 ) +#define ALLOC_VERTS( n, size ) radeon_alloc_verts( rmesa, n, (size) * 4 ) #undef LOCAL_VARS #define LOCAL_VARS \ r100ContextPtr rmesa = R100_CONTEXT(ctx); \ -- 2.7.4