From d39fd98a4785d68e7cff8f2e8caf11a3575288b5 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Wed, 17 May 2023 16:06:15 +0800 Subject: [PATCH] ac/llvm,radeonsi: lower nir_load_ring_esgs_amd in abi MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit It's not implemented by aco. So move the code from llvm to nir. Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 1 - src/gallium/drivers/radeonsi/si_nir_lower_abi.c | 46 +++++++++++++++ src/gallium/drivers/radeonsi/si_shader_internal.h | 3 - src/gallium/drivers/radeonsi/si_shader_llvm.c | 71 ++++++++++++----------- src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 41 ------------- 5 files changed, 83 insertions(+), 79 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 00ae400..4c2000f 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3201,7 +3201,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_load_first_vertex: case nir_intrinsic_load_tess_rel_patch_id_amd: case nir_intrinsic_load_ring_tess_offchip_amd: - case nir_intrinsic_load_ring_esgs_amd: case nir_intrinsic_load_ring_attr_amd: case nir_intrinsic_load_ring_gsvs_amd: case nir_intrinsic_load_lds_ngg_scratch_base_amd: diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 3a0ce9c..d74a8d0 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -16,6 +16,8 @@ struct lower_abi_state { struct si_shader *shader; struct si_shader_args *args; + + nir_ssa_def *esgs_ring; }; #define GET_FIELD_NIR(field) \ @@ -191,6 +193,44 @@ static nir_ssa_def *build_tess_factor_ring_desc(nir_builder *b, struct si_screen return nir_vec(b, comp, 4); } +static nir_ssa_def *build_esgs_ring_desc(nir_builder *b, enum amd_gfx_level gfx_level, + struct si_shader_args *args) +{ + nir_ssa_def *desc = si_nir_load_internal_binding(b, args, SI_RING_ESGS, 4); + + if (b->shader->info.stage == MESA_SHADER_GEOMETRY) + return desc; + + nir_ssa_def *vec[4]; + for (int i = 0; i < 4; i++) + vec[i] = nir_channel(b, desc, i); + + vec[1] = nir_ior_imm(b, vec[1], S_008F04_SWIZZLE_ENABLE_GFX6(1)); + vec[3] = nir_ior_imm(b, vec[3], + S_008F0C_ELEMENT_SIZE(1) | + S_008F0C_INDEX_STRIDE(3) | + S_008F0C_ADD_TID_ENABLE(1)); + + /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */ + if (gfx_level == GFX8) + vec[3] = nir_iand_imm(b, vec[3], C_008F0C_DATA_FORMAT); + + return nir_vec(b, vec, 4); +} + +static void preload_reusable_variables(nir_builder *b, struct lower_abi_state *s) +{ + const struct si_shader_selector *sel = s->shader->selector; + const union si_shader_key *key = &s->shader->key; + + b->cursor = nir_before_cf_list(&b->impl->body); + + if (sel->screen->info.gfx_level <= GFX8 && sel->stage <= MESA_SHADER_GEOMETRY && + (key->ge.as_es || sel->stage == MESA_SHADER_GEOMETRY)) { + s->esgs_ring = build_esgs_ring_desc(b, sel->screen->info.gfx_level, s->args); + } +} + static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_state *s) { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); @@ -540,6 +580,10 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s case nir_intrinsic_load_ordered_id_amd: replacement = ac_nir_unpack_arg(b, &args->ac, args->ac.gs_tg_info, 0, 12); break; + case nir_intrinsic_load_ring_esgs_amd: + assert(s->esgs_ring); + replacement = s->esgs_ring; + break; default: return false; } @@ -610,6 +654,8 @@ bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shade nir_builder b; nir_builder_init(&b, impl); + preload_reusable_variables(&b, &state); + bool progress = false; nir_foreach_block_safe(block, impl) { nir_foreach_instr_safe(instr, block) { diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index cfed7c3..28f7e16 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -97,7 +97,6 @@ struct si_shader_context { struct ac_llvm_compiler *compiler; /* Preloaded descriptors. */ - LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; LLVMValueRef tess_offchip_ring; LLVMValueRef instance_divisor_constbuf; @@ -181,7 +180,6 @@ LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueR LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret, struct ac_arg param, unsigned return_index); LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx); -void si_llvm_declare_esgs_ring(struct si_shader_context *ctx); LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift, unsigned bitwidth); void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_pointer *parts, @@ -197,7 +195,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * LLVMValueRef si_is_es_thread(struct si_shader_context *ctx); LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx); void si_llvm_es_build_end(struct si_shader_context *ctx); -void si_preload_esgs_ring(struct si_shader_context *ctx); void si_preload_gs_rings(struct si_shader_context *ctx); void si_llvm_gs_build_end(struct si_shader_context *ctx); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 1860fc4..1c5a11d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -312,17 +312,21 @@ LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx) * We declare it with 64KB alignment as a hint that the * pointer value will always be 0. */ -void si_llvm_declare_esgs_ring(struct si_shader_context *ctx) +static void si_llvm_declare_lds_esgs_ring(struct si_shader_context *ctx) { - if (ctx->esgs_ring) + if (ctx->ac.lds.value) return; assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring")); - ctx->esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), - "esgs_ring", AC_ADDR_SPACE_LDS); - LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage); - LLVMSetAlignment(ctx->esgs_ring, 64 * 1024); + LLVMValueRef esgs_ring = + LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), + "esgs_ring", AC_ADDR_SPACE_LDS); + LLVMSetLinkage(esgs_ring, LLVMExternalLinkage); + LLVMSetAlignment(esgs_ring, 64 * 1024); + + ctx->ac.lds.value = esgs_ring; + ctx->ac.lds.pointee_type = ctx->ac.i32; } static void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param, @@ -674,9 +678,6 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin case nir_intrinsic_load_tess_rel_patch_id_amd: return si_get_rel_patch_id(ctx); - case nir_intrinsic_load_ring_esgs_amd: - return ctx->esgs_ring; - case nir_intrinsic_load_ring_gsvs_amd: return ctx->gsvs_ring[nir_intrinsic_stream_id(intrin)]; @@ -756,10 +757,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade si_llvm_create_main_func(ctx); - if (ctx->stage <= MESA_SHADER_GEOMETRY && - (ctx->shader->key.ge.as_es || ctx->stage == MESA_SHADER_GEOMETRY)) - si_preload_esgs_ring(ctx); - switch (ctx->stage) { case MESA_SHADER_VERTEX: /* preload instance_divisor_constbuf to be used for input load after culling */ @@ -833,28 +830,34 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade break; } - if ((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) && - shader->key.ge.as_ngg && !shader->key.ge.as_es) { - /* Unconditionally declare scratch space base for streamout and - * vertex compaction. Whether space is actually allocated is - * determined during linking / PM4 creation. - */ - si_llvm_declare_esgs_ring(ctx); - ctx->ac.lds.value = ctx->esgs_ring; - ctx->ac.lds.pointee_type = ctx->ac.i32; + bool is_merged_esgs_stage = + ctx->screen->info.gfx_level >= GFX9 && ctx->stage <= MESA_SHADER_GEOMETRY && + (ctx->shader->key.ge.as_es || ctx->stage == MESA_SHADER_GEOMETRY); - /* This is really only needed when streamout and / or vertex - * compaction is enabled. - */ - if (si_shader_uses_streamout(shader) || shader->key.ge.opt.ngg_culling) { - LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader)); - ctx->gs_ngg_scratch = (struct ac_llvm_pointer) { - .value = LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS), - .pointee_type = asi32 - }; - LLVMSetInitializer(ctx->gs_ngg_scratch.value, LLVMGetUndef(asi32)); - LLVMSetAlignment(ctx->gs_ngg_scratch.value, 8); - } + bool is_nogs_ngg_stage = + (ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) && + shader->key.ge.as_ngg && !shader->key.ge.as_es; + + /* Declare the ESGS ring as an explicit LDS symbol. + * When NGG VS/TES, unconditionally declare for streamout and vertex compaction. + * Whether space is actually allocated is determined during linking / PM4 creation. + */ + if (is_merged_esgs_stage || is_nogs_ngg_stage) + si_llvm_declare_lds_esgs_ring(ctx); + + /* This is really only needed when streamout and / or vertex + * compaction is enabled. + */ + if (is_nogs_ngg_stage && + (si_shader_uses_streamout(shader) || shader->key.ge.opt.ngg_culling)) { + LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader)); + ctx->gs_ngg_scratch = (struct ac_llvm_pointer) { + .value = LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", + AC_ADDR_SPACE_LDS), + .pointee_type = asi32 + }; + LLVMSetInitializer(ctx->gs_ngg_scratch.value, LLVMGetUndef(asi32)); + LLVMSetAlignment(ctx->gs_ngg_scratch.value, 8); } /* For merged shaders (VS-TCS, VS-GS, TES-GS): */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 058eb73..2dd04d4 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -76,47 +76,6 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx) ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); } -void si_preload_esgs_ring(struct si_shader_context *ctx) -{ - LLVMBuilderRef builder = ctx->ac.builder; - - if (ctx->screen->info.gfx_level <= GFX8) { - LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, SI_RING_ESGS, 0); - - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, - ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings), offset); - - if (ctx->stage != MESA_SHADER_GEOMETRY) { - LLVMValueRef desc1 = LLVMBuildExtractElement(builder, ctx->esgs_ring, ctx->ac.i32_1, ""); - LLVMValueRef desc3 = LLVMBuildExtractElement(builder, ctx->esgs_ring, - LLVMConstInt(ctx->ac.i32, 3, 0), ""); - desc1 = LLVMBuildOr(builder, desc1, LLVMConstInt(ctx->ac.i32, - S_008F04_SWIZZLE_ENABLE_GFX6(1), 0), ""); - desc3 = LLVMBuildOr(builder, desc3, LLVMConstInt(ctx->ac.i32, - S_008F0C_ELEMENT_SIZE(1) | - S_008F0C_INDEX_STRIDE(3) | - S_008F0C_ADD_TID_ENABLE(1), 0), ""); - - /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */ - if (ctx->screen->info.gfx_level == GFX8) { - desc3 = LLVMBuildAnd(builder, desc3, - LLVMConstInt(ctx->ac.i32, C_008F0C_DATA_FORMAT, 0), ""); - } - - ctx->esgs_ring = LLVMBuildInsertElement(builder, ctx->esgs_ring, desc1, ctx->ac.i32_1, ""); - ctx->esgs_ring = LLVMBuildInsertElement(builder, ctx->esgs_ring, desc3, - LLVMConstInt(ctx->ac.i32, 3, 0), ""); - } - } else { - /* Declare the ESGS ring as an explicit LDS symbol. */ - si_llvm_declare_esgs_ring(ctx); - ctx->ac.lds = (struct ac_llvm_pointer) { - .value = ctx->esgs_ring, - .pointee_type = LLVMArrayType(ctx->ac.i32, 0), - }; - } -} - void si_preload_gs_rings(struct si_shader_context *ctx) { if (ctx->ac.gfx_level >= GFX11) -- 2.7.4