From 1001478a6811d1e769de7b78a3239466209fda6a Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Thu, 20 Jul 2023 16:53:55 +0800 Subject: [PATCH] radeonsi: support upload multi part shader binary MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Need to split shader binary into exec and data part, then combine exec and data of all shader parts separately. So const data symbols in code need to be relocated. Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/gallium/drivers/radeonsi/si_shader.c | 69 +++++++++++++++++-- src/gallium/drivers/radeonsi/si_shader_aco.c | 10 ++- .../drivers/radeonsi/si_shader_internal.h | 4 +- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e624f596173..f93dad3acc1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -830,6 +830,24 @@ bool si_shader_binary_open(struct si_screen *screen, struct si_shader *shader, return ok; } +static unsigned get_shader_binaries(struct si_shader *shader, struct si_shader_binary *bin[4]) +{ + unsigned num_bin = 0; + + if (shader->prolog) + bin[num_bin++] = &shader->prolog->binary; + + if (shader->previous_stage) + bin[num_bin++] = &shader->previous_stage->binary; + + bin[num_bin++] = &shader->binary; + + if (shader->epilog) + bin[num_bin++] = &shader->epilog->binary; + + return num_bin; +} + static unsigned si_get_shader_binary_size(struct si_screen *screen, struct si_shader *shader) { if (shader->binary.type == SI_SHADER_BINARY_ELF) { @@ -839,8 +857,15 @@ static unsigned si_get_shader_binary_size(struct si_screen *screen, struct si_sh ac_rtld_close(&rtld); return size; } else { - assert(shader->binary.type == SI_SHADER_BINARY_RAW); - return shader->binary.code_size; + struct si_shader_binary *bin[4]; + unsigned num_bin = get_shader_binaries(shader, bin); + + unsigned size = 0; + for (unsigned i = 0; i < num_bin; i++) { + assert(bin[i]->type == SI_SHADER_BINARY_RAW); + size += bin[i]->exec_size; + } + return size; } } @@ -942,8 +967,17 @@ static void calculate_needed_lds_size(struct si_screen *sscreen, struct si_shade static bool upload_binary_raw(struct si_screen *sscreen, struct si_shader *shader, uint64_t scratch_va) { - unsigned rx_size = - ac_align_shader_binary_for_prefetch(&sscreen->info, shader->binary.code_size); + struct si_shader_binary *bin[4]; + unsigned num_bin = get_shader_binaries(shader, bin); + + unsigned code_size = 0, exec_size = 0; + for (unsigned i = 0; i < num_bin; i++) { + assert(bin[i]->type == SI_SHADER_BINARY_RAW); + code_size += bin[i]->code_size; + exec_size += bin[i]->exec_size; + } + + unsigned rx_size = ac_align_shader_binary_for_prefetch(&sscreen->info, code_size); si_resource_reference(&shader->bo, NULL); shader->bo = @@ -965,9 +999,32 @@ static bool upload_binary_raw(struct si_screen *sscreen, struct si_shader *shade if (!rx_ptr) return false; - memcpy(rx_ptr, shader->binary.code_buffer, shader->binary.code_size); + unsigned exec_offset = 0, data_offset = exec_size; + for (unsigned i = 0; i < num_bin; i++) { + memcpy(rx_ptr + exec_offset, bin[i]->code_buffer, bin[i]->exec_size); + + if (bin[i]->num_symbols) { + /* Offset needed to add to const data symbol because of inserting other + * shader part between exec code and const data. + */ + unsigned const_offset = data_offset - exec_offset - bin[i]->exec_size; + + /* Prolog and epilog have no symbols. */ + struct si_shader *sh = bin[i] == &shader->binary ? shader : shader->previous_stage; + assert(sh && bin[i] == &sh->binary); - si_aco_resolve_symbols(shader, rx_ptr, scratch_va); + si_aco_resolve_symbols(sh, rx_ptr + exec_offset, (const uint32_t *)bin[i]->code_buffer, + scratch_va, const_offset); + } + + exec_offset += bin[i]->exec_size; + + unsigned data_size = bin[i]->code_size - bin[i]->exec_size; + if (data_size) { + memcpy(rx_ptr + data_offset, bin[i]->code_buffer + bin[i]->exec_size, data_size); + data_offset += data_size; + } + } sscreen->ws->buffer_unmap(sscreen->ws, shader->bo->buf); shader->gpu_address = shader->bo->gpu_address; diff --git a/src/gallium/drivers/radeonsi/si_shader_aco.c b/src/gallium/drivers/radeonsi/si_shader_aco.c index 4e1f371e7fa..62b300968f0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_aco.c +++ b/src/gallium/drivers/radeonsi/si_shader_aco.c @@ -177,7 +177,8 @@ si_aco_compile_shader(struct si_shader *shader, } void -si_aco_resolve_symbols(struct si_shader *shader, uint32_t *code, uint64_t scratch_va) +si_aco_resolve_symbols(struct si_shader *shader, uint32_t *code_for_write, + const uint32_t *code_for_read, uint64_t scratch_va, uint32_t const_offset) { const struct aco_symbol *symbols = (struct aco_symbol *)shader->binary.symbols; const struct si_shader_selector *sel = shader->selector; @@ -210,12 +211,15 @@ si_aco_resolve_symbols(struct si_shader *shader, uint32_t *code, uint64_t scratc value = shader->gs_info.esgs_ring_size * 4; break; case aco_symbol_const_data_addr: - continue; + if (!const_offset) + continue; + value = code_for_read[symbols[i].offset] + const_offset; + break; default: unreachable("invalid aco symbol"); break; } - code[symbols[i].offset] = value; + code_for_write[symbols[i].offset] = value; } } diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index ebf3b5c14e0..d5e00987a8f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -215,6 +215,8 @@ bool si_aco_compile_shader(struct si_shader *shader, struct si_shader_args *args, struct nir_shader *nir, struct util_debug_callback *debug); -void si_aco_resolve_symbols(struct si_shader *shader, uint32_t *code, uint64_t scratch_va); +void si_aco_resolve_symbols(struct si_shader *shader, uint32_t *code_for_write, + const uint32_t *code_for_read, uint64_t scratch_va, + uint32_t const_offset); #endif -- 2.34.1