From a98c9ba5809bdd5a31e30caab41984d127966d51 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 7 Apr 2017 21:41:10 +0200 Subject: [PATCH] radeonsi/gfx9: add si_shader::previous_stage for merged shaders MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_debug.c | 4 ++++ src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++++++++++++++++++++++++++- src/gallium/drivers/radeonsi/si_shader.h | 1 + 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 9d0c0c5..038c8b4 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -639,6 +639,10 @@ static void si_print_annotated_shader(struct si_shader *shader, si_add_split_disasm(shader->prolog->binary.disasm_string, start_addr, &num_inst, instructions); } + if (shader->previous_stage) { + si_add_split_disasm(shader->previous_stage->binary.disasm_string, + start_addr, &num_inst, instructions); + } si_add_split_disasm(shader->binary.disasm_string, start_addr, &num_inst, instructions); if (shader->epilog) { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 446c811..9640d8a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6149,6 +6149,8 @@ static unsigned si_get_shader_binary_size(struct si_shader *shader) if (shader->prolog) size += shader->prolog->binary.code_size; + if (shader->previous_stage) + size += shader->previous_stage->binary.code_size; if (shader->epilog) size += shader->epilog->binary.code_size; return size; @@ -6158,6 +6160,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) { const struct ac_shader_binary *prolog = shader->prolog ? &shader->prolog->binary : NULL; + const struct ac_shader_binary *previous_stage = + shader->previous_stage ? &shader->previous_stage->binary : NULL; const struct ac_shader_binary *epilog = shader->epilog ? &shader->epilog->binary : NULL; const struct ac_shader_binary *mainb = &shader->binary; @@ -6166,7 +6170,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) unsigned char *ptr; assert(!prolog || !prolog->rodata_size); - assert((!prolog && !epilog) || !mainb->rodata_size); + assert(!previous_stage || !previous_stage->rodata_size); + assert((!prolog && !previous_stage && !epilog) || !mainb->rodata_size); assert(!epilog || !epilog->rodata_size); /* GFX9 can fetch at most 128 bytes past the end of the shader. @@ -6192,6 +6197,11 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size); ptr += prolog->code_size; } + if (previous_stage) { + util_memcpy_cpu_to_le32(ptr, previous_stage->code, + previous_stage->code_size); + ptr += previous_stage->code_size; + } util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size); ptr += mainb->code_size; @@ -6399,6 +6409,9 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, if (shader->prolog) si_shader_dump_disassembly(&shader->prolog->binary, debug, "prolog", file); + if (shader->previous_stage) + si_shader_dump_disassembly(&shader->previous_stage->binary, + debug, "previous stage", file); si_shader_dump_disassembly(&shader->binary, debug, "main", file); @@ -8573,6 +8586,26 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, shader->config.num_vgprs = MAX2(shader->config.num_vgprs, shader->prolog->config.num_vgprs); } + if (shader->previous_stage) { + shader->config.num_sgprs = MAX2(shader->config.num_sgprs, + shader->previous_stage->config.num_sgprs); + shader->config.num_vgprs = MAX2(shader->config.num_vgprs, + shader->previous_stage->config.num_vgprs); + shader->config.spilled_sgprs = + MAX2(shader->config.spilled_sgprs, + shader->previous_stage->config.spilled_sgprs); + shader->config.spilled_vgprs = + MAX2(shader->config.spilled_vgprs, + shader->previous_stage->config.spilled_vgprs); + shader->config.private_mem_vgprs = + MAX2(shader->config.private_mem_vgprs, + shader->previous_stage->config.private_mem_vgprs); + shader->config.scratch_bytes_per_wave = + MAX2(shader->config.scratch_bytes_per_wave, + shader->previous_stage->config.scratch_bytes_per_wave); + shader->info.uses_instanceid |= + shader->previous_stage->info.uses_instanceid; + } if (shader->epilog) { shader->config.num_sgprs = MAX2(shader->config.num_sgprs, shader->epilog->config.num_sgprs); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 0673c6c..65da654 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -505,6 +505,7 @@ struct si_shader { struct si_shader *next_variant; struct si_shader_part *prolog; + struct si_shader *previous_stage; /* for GFX9 */ struct si_shader_part *epilog; struct si_pm4_state *pm4; -- 2.7.4