From f815009036837cb28a349f74ab9614ead75b0e8a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 21 Oct 2021 22:57:43 -0400 Subject: [PATCH] gallium/radeon: change the BO priority definitions to bits This is for the next microoptimization. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/r600/r600_cs.h | 6 +-- src/gallium/drivers/r600/r600_pipe_common.h | 2 +- src/gallium/drivers/radeon/radeon_winsys.h | 62 ++++++++++++------------- src/gallium/drivers/radeonsi/si_debug.c | 65 +++++++++++++-------------- src/gallium/drivers/radeonsi/si_descriptors.c | 12 ++--- src/gallium/drivers/radeonsi/si_pipe.h | 4 +- src/gallium/drivers/radeonsi/si_state.h | 4 +- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 +-- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 9 ++-- 9 files changed, 83 insertions(+), 87 deletions(-) diff --git a/src/gallium/drivers/r600/r600_cs.h b/src/gallium/drivers/r600/r600_cs.h index 71e606b..b65fcdb 100644 --- a/src/gallium/drivers/r600/r600_cs.h +++ b/src/gallium/drivers/r600/r600_cs.h @@ -70,7 +70,7 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct struct r600_ring *ring, struct r600_resource *rbo, enum radeon_bo_usage usage, - enum radeon_bo_priority priority) + unsigned priority) { assert(usage); return rctx->ws->cs_add_buffer( @@ -101,7 +101,7 @@ radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx, struct r600_ring *ring, struct r600_resource *rbo, enum radeon_bo_usage usage, - enum radeon_bo_priority priority, + unsigned priority, bool check_mem) { if (check_mem && @@ -116,7 +116,7 @@ radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx, static inline void r600_emit_reloc(struct r600_common_context *rctx, struct r600_ring *ring, struct r600_resource *rbo, enum radeon_bo_usage usage, - enum radeon_bo_priority priority) + unsigned priority) { struct radeon_cmdbuf *cs = &ring->cs; bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_has_virtual_memory; diff --git a/src/gallium/drivers/r600/r600_pipe_common.h b/src/gallium/drivers/r600/r600_pipe_common.h index 34293a4..aa7f631 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.h +++ b/src/gallium/drivers/r600/r600_pipe_common.h @@ -900,7 +900,7 @@ static inline unsigned r600_wavefront_size(enum radeon_family family) } } -static inline enum radeon_bo_priority +static inline unsigned r600_get_sampler_view_priority(struct r600_resource *res) { if (res->b.b.target == PIPE_BUFFER) diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 55c6b11..b234a66 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -142,53 +142,49 @@ enum radeon_value_id RADEON_CS_THREAD_TIME, }; -enum radeon_bo_priority -{ - /* Each group of two has the same priority. */ - RADEON_PRIO_FENCE = 0, - RADEON_PRIO_TRACE, +/* Each group of two has the same priority. */ +#define RADEON_PRIO_FENCE (1 << 0) +#define RADEON_PRIO_TRACE (1 << 1) - RADEON_PRIO_SO_FILLED_SIZE = 2, - RADEON_PRIO_QUERY, +#define RADEON_PRIO_SO_FILLED_SIZE (1 << 2) +#define RADEON_PRIO_QUERY (1 << 3) - RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ - RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */ +#define RADEON_PRIO_IB1 (1 << 4) /* main IB submitted to the kernel */ +#define RADEON_PRIO_IB2 (1 << 5) /* IB executed with INDIRECT_BUFFER */ - RADEON_PRIO_DRAW_INDIRECT = 6, - RADEON_PRIO_INDEX_BUFFER, +#define RADEON_PRIO_DRAW_INDIRECT (1 << 6) +#define RADEON_PRIO_INDEX_BUFFER (1 << 7) - RADEON_PRIO_CP_DMA = 8, - RADEON_PRIO_BORDER_COLORS, +#define RADEON_PRIO_CP_DMA (1 << 8) +#define RADEON_PRIO_BORDER_COLORS (1 << 9) - RADEON_PRIO_CONST_BUFFER = 10, - RADEON_PRIO_DESCRIPTORS, +#define RADEON_PRIO_CONST_BUFFER (1 << 10) +#define RADEON_PRIO_DESCRIPTORS (1 << 11) - RADEON_PRIO_SAMPLER_BUFFER = 12, - RADEON_PRIO_VERTEX_BUFFER, +#define RADEON_PRIO_SAMPLER_BUFFER (1 << 12) +#define RADEON_PRIO_VERTEX_BUFFER (1 << 13) - RADEON_PRIO_SHADER_RW_BUFFER = 14, - RADEON_PRIO_COMPUTE_GLOBAL, +#define RADEON_PRIO_SHADER_RW_BUFFER (1 << 14) +#define RADEON_PRIO_COMPUTE_GLOBAL (1 << 15) - RADEON_PRIO_SAMPLER_TEXTURE = 16, - RADEON_PRIO_SHADER_RW_IMAGE, +#define RADEON_PRIO_SAMPLER_TEXTURE (1 << 16) +#define RADEON_PRIO_SHADER_RW_IMAGE (1 << 17) - RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18, - RADEON_PRIO_COLOR_BUFFER, +#define RADEON_PRIO_SAMPLER_TEXTURE_MSAA (1 << 18) +#define RADEON_PRIO_COLOR_BUFFER (1 << 19) - RADEON_PRIO_DEPTH_BUFFER = 20, +#define RADEON_PRIO_DEPTH_BUFFER (1 << 20) - RADEON_PRIO_COLOR_BUFFER_MSAA = 22, +#define RADEON_PRIO_COLOR_BUFFER_MSAA (1 << 22) - RADEON_PRIO_DEPTH_BUFFER_MSAA = 24, +#define RADEON_PRIO_DEPTH_BUFFER_MSAA (1 << 24) - RADEON_PRIO_SEPARATE_META = 26, - RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */ +#define RADEON_PRIO_SEPARATE_META (1 << 26) +#define RADEON_PRIO_SHADER_BINARY (1 << 27) /* the hw can't hide instruction cache misses */ - RADEON_PRIO_SHADER_RINGS = 28, +#define RADEON_PRIO_SHADER_RINGS (1 << 28) - RADEON_PRIO_SCRATCH_BUFFER = 30, - /* 31 is the maximum value */ -}; +#define RADEON_PRIO_SCRATCH_BUFFER (1 << 30) struct winsys_handle; struct radeon_winsys_ctx; @@ -539,7 +535,7 @@ struct radeon_winsys { */ unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domain, - enum radeon_bo_priority priority); + unsigned priority); /** * Return the index of an already-added buffer. diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 540206c..7f700f1 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -498,42 +498,39 @@ void si_log_hw_flush(struct si_context *sctx) } } -static const char *priority_to_string(enum radeon_bo_priority priority) +static const char *priority_to_string(unsigned priority) { -#define ITEM(x) [RADEON_PRIO_##x] = #x - static const char *table[64] = { - ITEM(FENCE), - ITEM(TRACE), - ITEM(SO_FILLED_SIZE), - ITEM(QUERY), - ITEM(IB1), - ITEM(IB2), - ITEM(DRAW_INDIRECT), - ITEM(INDEX_BUFFER), - ITEM(CP_DMA), - ITEM(CONST_BUFFER), - ITEM(DESCRIPTORS), - ITEM(BORDER_COLORS), - ITEM(SAMPLER_BUFFER), - ITEM(VERTEX_BUFFER), - ITEM(SHADER_RW_BUFFER), - ITEM(COMPUTE_GLOBAL), - ITEM(SAMPLER_TEXTURE), - ITEM(SHADER_RW_IMAGE), - ITEM(SAMPLER_TEXTURE_MSAA), - ITEM(COLOR_BUFFER), - ITEM(DEPTH_BUFFER), - ITEM(COLOR_BUFFER_MSAA), - ITEM(DEPTH_BUFFER_MSAA), - ITEM(SEPARATE_META), - ITEM(SHADER_BINARY), - ITEM(SHADER_RINGS), - ITEM(SCRATCH_BUFFER), - }; +#define ITEM(x) if (priority == RADEON_PRIO_##x) return #x + ITEM(FENCE); + ITEM(TRACE); + ITEM(SO_FILLED_SIZE); + ITEM(QUERY); + ITEM(IB1); + ITEM(IB2); + ITEM(DRAW_INDIRECT); + ITEM(INDEX_BUFFER); + ITEM(CP_DMA); + ITEM(CONST_BUFFER); + ITEM(DESCRIPTORS); + ITEM(BORDER_COLORS); + ITEM(SAMPLER_BUFFER); + ITEM(VERTEX_BUFFER); + ITEM(SHADER_RW_BUFFER); + ITEM(COMPUTE_GLOBAL); + ITEM(SAMPLER_TEXTURE); + ITEM(SHADER_RW_IMAGE); + ITEM(SAMPLER_TEXTURE_MSAA); + ITEM(COLOR_BUFFER); + ITEM(DEPTH_BUFFER); + ITEM(COLOR_BUFFER_MSAA); + ITEM(DEPTH_BUFFER_MSAA); + ITEM(SEPARATE_META); + ITEM(SHADER_BINARY); + ITEM(SHADER_RINGS); + ITEM(SCRATCH_BUFFER); #undef ITEM - assert(priority < ARRAY_SIZE(table)); - return table[priority]; + return ""; } static int bo_list_compare_va(const struct radeon_bo_list_item *a, @@ -582,7 +579,7 @@ static void si_dump_bo_list(struct si_context *sctx, const struct radeon_saved_c if (!(saved->bo_list[i].priority_usage & (1u << j))) continue; - fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j)); + fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(1u << j)); hit = true; } fprintf(f, "\n"); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 89c09db..7699ba6 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -188,7 +188,7 @@ si_add_descriptors_to_bo_list(struct si_context *sctx, struct si_descriptors *de /* SAMPLER VIEWS */ -static inline enum radeon_bo_priority si_get_sampler_view_priority(struct si_resource *res) +static inline unsigned si_get_sampler_view_priority(struct si_resource *res) { if (res->b.b.target == PIPE_BUFFER) return RADEON_PRIO_SAMPLER_BUFFER; @@ -219,7 +219,7 @@ static void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_reso bool check_mem) { struct si_texture *tex = (struct si_texture *)resource; - enum radeon_bo_priority priority; + unsigned priority; if (!resource) return; @@ -1027,8 +1027,8 @@ static void si_init_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, struct si_descriptors *descs, unsigned num_buffers, short shader_userdata_rel_index, - enum radeon_bo_priority priority, - enum radeon_bo_priority priority_constbuf) + unsigned priority, + unsigned priority_constbuf) { buffers->priority = priority; buffers->priority_constbuf = priority_constbuf; @@ -1331,7 +1331,7 @@ void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resources *buffers, unsigned descriptors_idx, uint slot, const struct pipe_shader_buffer *sbuffer, bool writable, - enum radeon_bo_priority priority) + unsigned priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; uint32_t *desc = descs->list + slot * 4; @@ -1608,7 +1608,7 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx) */ static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, unsigned descriptors_idx, uint64_t slot_mask, - struct pipe_resource *buf, enum radeon_bo_priority priority) + struct pipe_resource *buf, unsigned priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; bool noop = true; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a128983..582720b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1941,7 +1941,7 @@ static inline void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_dra */ static inline void radeon_add_to_buffer_list(struct si_context *sctx, struct radeon_cmdbuf *cs, struct si_resource *bo, enum radeon_bo_usage usage, - enum radeon_bo_priority priority) + unsigned priority) { assert(usage); sctx->ws->cs_add_buffer(cs, bo->buf, (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED), @@ -1966,7 +1966,7 @@ static inline void radeon_add_to_buffer_list(struct si_context *sctx, struct rad static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx, struct si_resource *bo, enum radeon_bo_usage usage, - enum radeon_bo_priority priority, + unsigned priority, bool check_mem) { if (check_mem && diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index af2f750..29965f8 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -454,8 +454,8 @@ struct si_buffer_resources { struct pipe_resource **buffers; /* this has num_buffers elements */ unsigned *offsets; /* this has num_buffers elements */ - enum radeon_bo_priority priority : 6; - enum radeon_bo_priority priority_constbuf : 6; + unsigned priority; + unsigned priority_constbuf; /* The i-th bit is set if that element is enabled (non-NULL resource). */ uint64_t enabled_mask; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 141f6f6..ac27cce 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -645,7 +645,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs, struct pb_buffer *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domains, - enum radeon_bo_priority priority) + unsigned priority) { /* Don't use the "domains" parameter. Amdgpu doesn't support changing * the buffer placement during command submission. @@ -662,7 +662,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs, */ if (bo == cs->last_added_bo && (usage & cs->last_added_bo_usage) == usage && - (1u << priority) & cs->last_added_bo_priority_usage) + priority & cs->last_added_bo_priority_usage) return cs->last_added_bo_index; if (!(bo->base.usage & RADEON_FLAG_SPARSE)) { @@ -691,7 +691,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs, buffer = &cs->sparse_buffers[index]; } - buffer->u.real.priority_usage |= 1u << priority; + buffer->u.real.priority_usage |= priority; buffer->usage |= usage; cs->last_added_bo = bo; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 0e37d1c..59ce65f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -362,7 +362,7 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs, struct pb_buffer *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domains, - enum radeon_bo_priority priority) + unsigned priority) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; @@ -394,8 +394,11 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs, added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); reloc->read_domains |= rd; reloc->write_domain |= wd; - reloc->flags = MAX2(reloc->flags, priority); - cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority; + + /* The priority must be in [0, 15]. It's used by the kernel memory management. */ + unsigned bo_priority = util_last_bit(priority) / 2; + reloc->flags = MAX2(reloc->flags, bo_priority); + cs->csc->relocs_bo[index].u.real.priority_usage |= priority; if (added_domains & RADEON_DOMAIN_VRAM) rcs->used_vram_kb += bo->base.size / 1024; -- 2.7.4