From: Marek Olšák Date: Wed, 1 Nov 2017 23:00:53 +0000 (+0100) Subject: gallium/u_threaded: don't map big VRAM buffers for the first upload directly X-Git-Tag: upstream/18.1.0~4491 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4b0dc098b2561c07c59f7dab2813640a25789bf1;p=platform%2Fupstream%2Fmesa.git gallium/u_threaded: don't map big VRAM buffers for the first upload directly This improves Paraview "many spheres" performance 4x along with the radeonsi commit. Reviewed-by: Nicolai Hähnle --- diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index 0f23258..ccce12b 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -1284,6 +1284,20 @@ tc_improve_map_buffer_flags(struct threaded_context *tc, if (usage & tc_flags) return usage; + /* Use the staging upload if it's preferred. */ + if (usage & (PIPE_TRANSFER_DISCARD_RANGE | + PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) && + !(usage & PIPE_TRANSFER_PERSISTENT) && + /* Try not to decrement the counter if it's not positive. Still racy, + * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */ + tres->max_forced_staging_uploads > 0 && + p_atomic_dec_return(&tres->max_forced_staging_uploads) >= 0) { + usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_UNSYNCHRONIZED); + + return usage | tc_flags | PIPE_TRANSFER_DISCARD_RANGE; + } + /* Sparse buffers can't be mapped directly and can't be reallocated * (fully invalidated). That may just be a radeonsi limitation, but * the threaded context must obey it with radeonsi. diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index 8977b03..ac7bc3d 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -241,6 +241,12 @@ struct threaded_resource { * pointers. */ bool is_shared; bool is_user_ptr; + + /* If positive, prefer DISCARD_RANGE with a staging buffer over any other + * method of CPU access when map flags allow it. Useful for buffers that + * are too large for the visible VRAM window. + */ + int max_forced_staging_uploads; }; struct threaded_transfer { diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 67daaa4..92521f4 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -191,10 +191,15 @@ void si_init_resource_fields(struct r600_common_screen *rscreen, res->vram_usage = 0; res->gart_usage = 0; - if (res->domains & RADEON_DOMAIN_VRAM) + if (res->domains & RADEON_DOMAIN_VRAM) { res->vram_usage = size; - else if (res->domains & RADEON_DOMAIN_GTT) + + res->b.max_forced_staging_uploads = + rscreen->info.has_dedicated_vram && + size >= rscreen->info.vram_vis_size / 4 ? 1 : 0; + } else if (res->domains & RADEON_DOMAIN_GTT) { res->gart_usage = size; + } } bool si_alloc_resource(struct r600_common_screen *rscreen, @@ -289,6 +294,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx, pb_reference(&rdst->buf, rsrc->buf); rdst->gpu_address = rsrc->gpu_address; rdst->b.b.bind = rsrc->b.b.bind; + rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads; rdst->flags = rsrc->flags; assert(rdst->vram_usage == rsrc->vram_usage);