From: Qiang Yu Date: Tue, 11 Oct 2022 08:29:44 +0000 (+0800) Subject: ac/nir/ngg,ac/llvm,aco: save nogs ngg culling one lds dword X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=13fb7f8f2cc2485e9daf60e408c93efd31217e9e;p=platform%2Fupstream%2Fmesa.git ac/nir/ngg,ac/llvm,aco: save nogs ngg culling one lds dword TES rel patch id is <256, so we can use an existing unused LDS byte instead of extra dword. To ease the programing, change the index of repacked_arg_vars for these variables. Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 167db84..bd6e308 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -212,6 +212,8 @@ enum { lds_es_exporter_tid = 17, /* bit i is set when the i'th clip distance of a vertex is negative */ lds_es_clipdist_neg_mask = 18, + /* TES only, relative patch ID, less than max workgroup size */ + lds_es_tes_rel_patch_id = 19, /* Repacked arguments - also listed separately for VS and TES */ lds_es_arg_0 = 20, @@ -223,8 +225,7 @@ enum { /* TES arguments which need to be repacked */ lds_es_tes_u = 20, lds_es_tes_v = 24, - lds_es_tes_rel_patch_id = 28, - lds_es_tes_patch_id = 32, + lds_es_tes_patch_id = 28, }; typedef struct { @@ -872,9 +873,9 @@ cleanup_culling_shader_after_dce(nir_shader *shader, if (!uses_tes_v) progress |= remove_compacted_arg(state, &b, 1); if (!uses_tes_rel_patch_id) - progress |= remove_compacted_arg(state, &b, 2); - if (!uses_tes_patch_id) progress |= remove_compacted_arg(state, &b, 3); + if (!uses_tes_patch_id) + progress |= remove_compacted_arg(state, &b, 2); } return progress; @@ -925,6 +926,16 @@ compact_vertices_after_culling(nir_builder *b, nogs_state->compact_arg_stores[i] = &store->instr; } + + /* TES rel patch id does not cost extra dword */ + if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) { + nir_ssa_def *arg_val = nir_load_var(b, repacked_arg_vars[3]); + nir_intrinsic_instr *store = + nir_store_shared(b, nir_u2u8(b, arg_val), exporter_addr, + .base = lds_es_tes_rel_patch_id); + + nogs_state->compact_arg_stores[3] = &store->instr; + } } nir_pop_if(b, if_es_accepted); @@ -947,6 +958,12 @@ compact_vertices_after_culling(nir_builder *b, nir_ssa_def *arg_val = nir_load_shared(b, 1, 32, es_vertex_lds_addr, .base = lds_es_arg_0 + 4u * i); nir_store_var(b, repacked_arg_vars[i], arg_val, 0x1u); } + + if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) { + nir_ssa_def *arg_val = nir_load_shared(b, 1, 8, es_vertex_lds_addr, + .base = lds_es_tes_rel_patch_id); + nir_store_var(b, repacked_arg_vars[3], nir_u2u32(b, arg_val), 0x1u); + } } nir_push_else(b, if_packed_es_thread); { @@ -1289,7 +1306,7 @@ ngg_nogs_get_culling_pervertex_lds_size(gl_shader_stage stage, max_args = uses_instance_id ? 2 : 1; } else { assert(stage == MESA_SHADER_TESS_EVAL); - max_args = uses_primitive_id ? 4 : 3; + max_args = uses_primitive_id ? 3 : 2; } if (max_exported_args) @@ -1369,9 +1386,9 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c nir_ssa_def *tess_coord = nir_load_tess_coord(b); nir_store_var(b, repacked_arg_vars[0], nir_channel(b, tess_coord, 0), 0x1u); nir_store_var(b, repacked_arg_vars[1], nir_channel(b, tess_coord, 1), 0x1u); - nir_store_var(b, repacked_arg_vars[2], nir_load_tess_rel_patch_id_amd(b), 0x1u); + nir_store_var(b, repacked_arg_vars[3], nir_load_tess_rel_patch_id_amd(b), 0x1u); if (uses_tess_primitive_id) - nir_store_var(b, repacked_arg_vars[3], nir_load_primitive_id(b), 0x1u); + nir_store_var(b, repacked_arg_vars[2], nir_load_primitive_id(b), 0x1u); } else { unreachable("Should be VS or TES."); } diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 88b686b..eb8f9dc 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -9176,8 +9176,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) ctx->arg_temps[ctx->args->ac.tes_u.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa); ctx->arg_temps[ctx->args->ac.tes_v.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa); ctx->arg_temps[ctx->args->ac.tes_rel_patch_id.arg_index] = - get_ssa_temp(ctx, instr->src[2].ssa); - ctx->arg_temps[ctx->args->ac.tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[3].ssa); + get_ssa_temp(ctx, instr->src[3].ssa); + ctx->arg_temps[ctx->args->ac.tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[2].ssa); break; } case nir_intrinsic_load_force_vrs_rates_amd: { diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 7e39440..69dc14c 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4312,8 +4312,8 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_overwrite_tes_arguments_amd: ctx->abi->tes_u_replaced = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); ctx->abi->tes_v_replaced = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1])); - ctx->abi->tes_rel_patch_id_replaced = get_src(ctx, instr->src[2]); - ctx->abi->tes_patch_id_replaced = get_src(ctx, instr->src[3]); + ctx->abi->tes_rel_patch_id_replaced = get_src(ctx, instr->src[3]); + ctx->abi->tes_patch_id_replaced = get_src(ctx, instr->src[2]); break; case nir_intrinsic_export_primitive_amd: { struct ac_ngg_prim prim = {0};