From 965c6445ad419aa49302f75db1d99345708c5aae Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 23 Jan 2021 17:21:44 -0500 Subject: [PATCH] winsys/amdgpu,radeonsi: add HUD counters for how much memory is wasted by slabs Slabs always allocate the next power of two size from their pools. This wastes memory if the size is not a power of two. bo->base.size is overwritten because the default is the allocated power of two size, but we need the real size to compute the wasted size in amdgpu_bo_slab_destroy. entry_size is added to the hole in pb_slab_entry to hold the real entry size. Like other memory stats, no atomics are used. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/auxiliary/pipebuffer/pb_slab.h | 1 + src/gallium/drivers/radeon/radeon_winsys.h | 2 ++ src/gallium/drivers/radeonsi/si_query.c | 12 ++++++++++++ src/gallium/drivers/radeonsi/si_query.h | 2 ++ src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 22 +++++++++++++++++++++- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 4 ++++ src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 2 ++ src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 1 + src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 ++ 9 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/pipebuffer/pb_slab.h b/src/gallium/auxiliary/pipebuffer/pb_slab.h index 78a4bf7..584e311 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_slab.h +++ b/src/gallium/auxiliary/pipebuffer/pb_slab.h @@ -62,6 +62,7 @@ struct pb_slab_entry struct list_head head; struct pb_slab *slab; /* the slab that contains this buffer */ unsigned group_index; /* index into pb_slabs::groups */ + unsigned entry_size; }; /* Descriptor of a slab from which many entries are carved out. diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index e3314f2..9f83c6a 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -119,6 +119,8 @@ enum radeon_value_id RADEON_REQUESTED_GTT_MEMORY, RADEON_MAPPED_VRAM, RADEON_MAPPED_GTT, + RADEON_SLAB_WASTED_VRAM, + RADEON_SLAB_WASTED_GTT, RADEON_BUFFER_WAIT_TIME_NS, RADEON_NUM_MAPPED_BUFFERS, RADEON_TIMESTAMP, diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 007015d..4aa83fa 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -77,6 +77,10 @@ static enum radeon_value_id winsys_id_from_type(unsigned type) return RADEON_MAPPED_VRAM; case SI_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT; + case SI_QUERY_SLAB_WASTED_VRAM: + return RADEON_SLAB_WASTED_VRAM; + case SI_QUERY_SLAB_WASTED_GTT: + return RADEON_SLAB_WASTED_GTT; case SI_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS; case SI_QUERY_NUM_MAPPED_BUFFERS: @@ -173,6 +177,8 @@ static bool si_query_sw_begin(struct si_context *sctx, struct si_query *squery) case SI_QUERY_REQUESTED_GTT: case SI_QUERY_MAPPED_VRAM: case SI_QUERY_MAPPED_GTT: + case SI_QUERY_SLAB_WASTED_VRAM: + case SI_QUERY_SLAB_WASTED_GTT: case SI_QUERY_VRAM_USAGE: case SI_QUERY_VRAM_VIS_USAGE: case SI_QUERY_GTT_USAGE: @@ -339,6 +345,8 @@ static bool si_query_sw_end(struct si_context *sctx, struct si_query *squery) case SI_QUERY_REQUESTED_GTT: case SI_QUERY_MAPPED_VRAM: case SI_QUERY_MAPPED_GTT: + case SI_QUERY_SLAB_WASTED_VRAM: + case SI_QUERY_SLAB_WASTED_GTT: case SI_QUERY_VRAM_USAGE: case SI_QUERY_VRAM_VIS_USAGE: case SI_QUERY_GTT_USAGE: @@ -1691,6 +1699,8 @@ static struct pipe_driver_query_info si_driver_query_list[] = { X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE), X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE), + X("slab-wasted-VRAM", SLAB_WASTED_VRAM, BYTES, AVERAGE), + X("slab-wasted-GTT", SLAB_WASTED_GTT, BYTES, AVERAGE), X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE), X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE), @@ -1804,11 +1814,13 @@ static int si_get_driver_query_info(struct pipe_screen *screen, unsigned index, case SI_QUERY_REQUESTED_VRAM: case SI_QUERY_VRAM_USAGE: case SI_QUERY_MAPPED_VRAM: + case SI_QUERY_SLAB_WASTED_VRAM: info->max_value.u64 = sscreen->info.vram_size; break; case SI_QUERY_REQUESTED_GTT: case SI_QUERY_GTT_USAGE: case SI_QUERY_MAPPED_GTT: + case SI_QUERY_SLAB_WASTED_GTT: info->max_value.u64 = sscreen->info.gart_size; break; case SI_QUERY_GPU_TEMPERATURE: diff --git a/src/gallium/drivers/radeonsi/si_query.h b/src/gallium/drivers/radeonsi/si_query.h index 48485c2..89e75ec 100644 --- a/src/gallium/drivers/radeonsi/si_query.h +++ b/src/gallium/drivers/radeonsi/si_query.h @@ -64,6 +64,8 @@ enum SI_QUERY_REQUESTED_GTT, SI_QUERY_MAPPED_VRAM, SI_QUERY_MAPPED_GTT, + SI_QUERY_SLAB_WASTED_VRAM, + SI_QUERY_SLAB_WASTED_GTT, SI_QUERY_BUFFER_WAIT_TIME, SI_QUERY_NUM_MAPPED_BUFFERS, SI_QUERY_NUM_GFX_IBS, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 0ab3953..5108365 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -647,6 +647,15 @@ static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size, return NULL; } +static unsigned get_slab_wasted_size(struct amdgpu_winsys_bo *bo) +{ + assert(bo->base.size <= bo->u.slab.entry.entry_size); + assert(bo->base.size < bo->base.alignment || + bo->base.size < 1 << bo->ws->bo_slabs[0].min_order || + bo->base.size > bo->u.slab.entry.entry_size / 2); + return bo->u.slab.entry.entry_size - bo->base.size; +} + static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); @@ -661,6 +670,11 @@ static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf) pb_slab_free(get_slabs(bo->ws, bo->base.size, 0), &bo->u.slab.entry); + + if (bo->base.placement & RADEON_DOMAIN_VRAM) + bo->ws->slab_wasted_vram -= get_slab_wasted_size(bo); + else + bo->ws->slab_wasted_gtt -= get_slab_wasted_size(bo); } static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = { @@ -737,6 +751,7 @@ static struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, bo->unique_id = base_id + i; bo->u.slab.entry.slab = &slab->base; bo->u.slab.entry.group_index = group_index; + bo->u.slab.entry.entry_size = entry_size; if (slab->buffer->bo) { /* The slab is not suballocated. */ @@ -1331,8 +1346,13 @@ amdgpu_bo_create(struct amdgpu_winsys *ws, return NULL; bo = container_of(entry, struct amdgpu_winsys_bo, u.slab.entry); - pipe_reference_init(&bo->base.reference, 1); + bo->base.size = size; + + if (domain & RADEON_DOMAIN_VRAM) + ws->slab_wasted_vram += get_slab_wasted_size(bo); + else + ws->slab_wasted_gtt += get_slab_wasted_size(bo); return &bo->base; } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 6b87601..194f0da 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -223,6 +223,10 @@ static uint64_t amdgpu_query_value(struct radeon_winsys *rws, return ws->mapped_vram; case RADEON_MAPPED_GTT: return ws->mapped_gtt; + case RADEON_SLAB_WASTED_VRAM: + return ws->slab_wasted_vram; + case RADEON_SLAB_WASTED_GTT: + return ws->slab_wasted_gtt; case RADEON_BUFFER_WAIT_TIME_NS: return ws->buffer_wait_time; case RADEON_NUM_MAPPED_BUFFERS: diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index 4c7407c..27c473a 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -66,6 +66,8 @@ struct amdgpu_winsys { uint64_t allocated_gtt; uint64_t mapped_vram; uint64_t mapped_gtt; + uint64_t slab_wasted_vram; + uint64_t slab_wasted_gtt; uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */ uint64_t num_gfx_IBs; uint64_t num_sdma_IBs; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 220008e..22cd399 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -812,6 +812,7 @@ struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap, bo->hash = base_hash + i; bo->u.slab.entry.slab = &slab->base; bo->u.slab.entry.group_index = group_index; + bo->u.slab.entry.entry_size = entry_size; bo->u.slab.real = slab->buffer; list_addtail(&bo->u.slab.entry.head, &slab->base.free); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 8f19494..8d25ff3 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -730,6 +730,8 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws, case RADEON_VRAM_VIS_USAGE: case RADEON_GFX_BO_LIST_COUNTER: case RADEON_GFX_IB_SIZE_COUNTER: + case RADEON_SLAB_WASTED_VRAM: + case RADEON_SLAB_WASTED_GTT: return 0; /* unimplemented */ case RADEON_VRAM_USAGE: radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE, -- 2.7.4