From 6caac3ecb8bc32d92c35fdb1f0a67541ffa8af29 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 3 Apr 2011 21:25:40 +0200 Subject: [PATCH] r300g: do not wait for a busy BO if neither GPU nor CPU is changing it Improves frame rate in apps with at least one user vertex buffer and a hw index buffer. --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 47 ++++++++++++++++++++++----- src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 7 ++++ src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 9 ++++- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 24 ++++++++++++-- 4 files changed, 75 insertions(+), 12 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index d90903a..3d0fcea 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -99,20 +99,27 @@ static void radeon_bo_wait(struct r300_winsys_bo *_buf) args.handle = bo->handle; while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE, &args, sizeof(args)) == -EBUSY); + + bo->busy_for_write = FALSE; } static boolean radeon_bo_is_busy(struct r300_winsys_bo *_buf) { struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf)); struct drm_radeon_gem_busy args = {}; + boolean busy; if (p_atomic_read(&bo->num_active_ioctls)) { return TRUE; } args.handle = bo->handle; - return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, + busy = drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args)) != 0; + + if (!busy) + bo->busy_for_write = FALSE; + return busy; } static void radeon_bo_destroy(struct pb_buffer *_buf) @@ -141,6 +148,9 @@ static unsigned get_pb_usage_from_transfer_flags(enum pipe_transfer_usage usage) { unsigned res = 0; + if (usage & PIPE_TRANSFER_WRITE) + res |= PB_USAGE_CPU_WRITE; + if (usage & PIPE_TRANSFER_DONTBLOCK) res |= PB_USAGE_DONTBLOCK; @@ -171,15 +181,36 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf, return NULL; } } else { - if (radeon_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, 0); + if (!(flags & PB_USAGE_CPU_WRITE)) { + /* Mapping for read. + * + * Since we are mapping for read, we don't need to wait + * if the GPU is using the buffer for read too + * (neither one is changing it). + * + * Only check whether the buffer is being used for write. */ + if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { + cs->flush_cs(cs->flush_data, 0); + radeon_bo_wait((struct r300_winsys_bo*)bo); + } else if (bo->busy_for_write) { + /* Update the busy_for_write field (done by radeon_bo_is_busy) + * and wait if needed. */ + if (radeon_bo_is_busy((struct r300_winsys_bo*)bo)) { + radeon_bo_wait((struct r300_winsys_bo*)bo); + } + } } else { - /* Try to avoid busy-waiting in radeon_bo_wait. */ - if (p_atomic_read(&bo->num_active_ioctls)) - radeon_drm_cs_sync_flush(cs); + /* Mapping for write. */ + if (radeon_bo_is_referenced_by_cs(cs, bo)) { + cs->flush_cs(cs->flush_data, 0); + } else { + /* Try to avoid busy-waiting in radeon_bo_wait. */ + if (p_atomic_read(&bo->num_active_ioctls)) + radeon_drm_cs_sync_flush(cs); + } + + radeon_bo_wait((struct r300_winsys_bo*)bo); } - - radeon_bo_wait((struct r300_winsys_bo*)bo); } } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index e0247f2..b20a099 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -60,6 +60,13 @@ struct radeon_bo { * thread, is this bo referenced in? */ int num_active_ioctls; + /* Whether the buffer has been relocated for write and is busy since then. + * This field is updated in: + * - radeon_drm_cs_flush (to TRUE if it's relocated for write) + * - radeon_bo_is_busy (to FALSE if it's not busy) + * - radeon_bo_wait (to FALSE) */ + boolean busy_for_write; + boolean flinked; uint32_t flink; }; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 4adf4ad..a506bdc 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -377,9 +377,16 @@ static void radeon_drm_cs_flush(struct r300_winsys_cs *rcs, unsigned flags) cs->csc->chunks[0].length_dw = cs->base.cdw; - for (i = 0; i < crelocs; i++) + for (i = 0; i < crelocs; i++) { + /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls); + /* Update whether the buffer is busy for write. */ + if (cs->csc->relocs[i].write_domain) { + cs->csc->relocs_bo[i]->busy_for_write = TRUE; + } + } + if (cs->ws->num_cpus > 1 && debug_get_option_thread() && (flags & R300_FLUSH_ASYNC)) { cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs->csc); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index dfaa161..dc2050a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -84,14 +84,32 @@ radeon_drm_cs(struct r300_winsys_cs *base) return (struct radeon_drm_cs*)base; } -static INLINE boolean radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, - struct radeon_bo *bo) +static INLINE boolean +radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs, + struct radeon_bo *bo) { return bo->num_cs_references == bo->rws->num_cs || (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1); } -static INLINE boolean radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) +static INLINE boolean +radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs, + struct radeon_bo *bo) +{ + int index; + + if (!bo->num_cs_references) + return FALSE; + + index = radeon_get_reloc(cs->csc, bo); + if (index == -1) + return FALSE; + + return cs->csc->relocs[index].write_domain != 0; +} + +static INLINE boolean +radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) { return bo->num_cs_references; } -- 2.7.4