radv: Refactor calculate_tess_lds_size and get_tcs_num_patches.
authorTimur Kristóf <timur.kristof@gmail.com>
Mon, 30 Mar 2020 14:04:53 +0000 (16:04 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 29 Apr 2020 11:51:04 +0000 (11:51 +0000)
Previously these functions needed the bit mask of the TCS outputs
and patch outputs written, and concluded the number of outputs
from that.

Now, they take the number of outputs and patch outputs instead.
This will allow the backend compiler to better optimize the
LDS layout.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4388>

src/amd/compiler/aco_instruction_selection_setup.cpp
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_shader.h

index 8028031..bf9e96e 100644 (file)
@@ -105,6 +105,8 @@ struct isel_context {
    unsigned tcs_tess_lvl_in_loc;
    uint64_t tcs_temp_only_inputs;
    uint32_t tcs_num_inputs;
+   uint32_t tcs_num_outputs;
+   uint32_t tcs_num_patch_outputs;
    uint32_t tcs_num_patches;
    bool tcs_in_out_eq = false;
 
@@ -871,12 +873,15 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir)
       unreachable("Unsupported TCS shader stage");
    }
 
+   ctx->tcs_num_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
+   ctx->tcs_num_patch_outputs = util_last_bit64(ctx->args->shader_info->tcs.patch_outputs_written);
+
    ctx->tcs_num_patches = get_tcs_num_patches(
                              ctx->args->options->key.tcs.input_vertices,
                              nir->info.tess.tcs_vertices_out,
                              ctx->tcs_num_inputs,
-                             ctx->args->shader_info->tcs.outputs_written,
-                             ctx->args->shader_info->tcs.patch_outputs_written,
+                             ctx->tcs_num_outputs,
+                             ctx->tcs_num_patch_outputs,
                              ctx->args->options->tess_offchip_block_dw_size,
                              ctx->args->options->chip_class,
                              ctx->args->options->family);
@@ -885,8 +890,8 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir)
                              nir->info.tess.tcs_vertices_out,
                              ctx->tcs_num_inputs,
                              ctx->tcs_num_patches,
-                             ctx->args->shader_info->tcs.outputs_written,
-                             ctx->args->shader_info->tcs.patch_outputs_written);
+                             ctx->tcs_num_outputs,
+                             ctx->tcs_num_patch_outputs);
 
    ctx->args->shader_info->tcs.num_patches = ctx->tcs_num_patches;
    ctx->args->shader_info->tcs.lds_size = lds_size;
index a40467a..3f214f7 100644 (file)
@@ -4004,13 +4004,15 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                                ctx.tcs_num_inputs = args->options->key.tcs.num_inputs;
                        else
                                ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written);
+                       unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
+                       unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
                        ctx.tcs_num_patches =
                                get_tcs_num_patches(
                                        ctx.args->options->key.tcs.input_vertices,
                                        ctx.shader->info.tess.tcs_vertices_out,
                                        ctx.tcs_num_inputs,
-                                       ctx.args->shader_info->tcs.outputs_written,
-                                       ctx.args->shader_info->tcs.patch_outputs_written,
+                                       tcs_num_outputs,
+                                       tcs_num_patch_outputs,
                                        ctx.args->options->tess_offchip_block_dw_size,
                                        ctx.args->options->chip_class,
                                        ctx.args->options->family);
@@ -4114,6 +4116,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                }
 
                if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+                       unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
+                       unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
                        args->shader_info->tcs.num_patches = ctx.tcs_num_patches;
                        args->shader_info->tcs.lds_size =
                                calculate_tess_lds_size(
@@ -4121,8 +4125,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                                        ctx.shader->info.tess.tcs_vertices_out,
                                        ctx.tcs_num_inputs,
                                        ctx.tcs_num_patches,
-                                       ctx.args->shader_info->tcs.outputs_written,
-                                       ctx.args->shader_info->tcs.patch_outputs_written);
+                                       tcs_num_outputs,
+                                       tcs_num_patch_outputs);
                }
        }
 
index 165df3a..608900b 100644 (file)
@@ -527,19 +527,16 @@ calculate_tess_lds_size(unsigned tcs_num_input_vertices,
                        unsigned tcs_num_output_vertices,
                        unsigned tcs_num_inputs,
                        unsigned tcs_num_patches,
-                       unsigned tcs_outputs_written,
-                       unsigned tcs_per_patch_outputs_written)
+                       unsigned tcs_num_outputs,
+                       unsigned tcs_num_patch_outputs)
 {
-       unsigned num_tcs_outputs = util_last_bit64(tcs_outputs_written);
-       unsigned num_tcs_patch_outputs = util_last_bit64(tcs_per_patch_outputs_written);
-
        unsigned input_vertex_size = tcs_num_inputs * 16;
-       unsigned output_vertex_size = num_tcs_outputs * 16;
+       unsigned output_vertex_size = tcs_num_outputs * 16;
 
        unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
 
        unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
-       unsigned output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+       unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
 
        unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
 
@@ -550,19 +547,17 @@ static inline unsigned
 get_tcs_num_patches(unsigned tcs_num_input_vertices,
                        unsigned tcs_num_output_vertices,
                        unsigned tcs_num_inputs,
-                       unsigned tcs_outputs_written,
-                       unsigned tcs_per_patch_outputs_written,
+                       unsigned tcs_num_outputs,
+                       unsigned tcs_num_patch_outputs,
                        unsigned tess_offchip_block_dw_size,
                        enum chip_class chip_class,
                        enum radeon_family family)
 {
        uint32_t input_vertex_size = tcs_num_inputs * 16;
        uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
-       uint32_t num_tcs_outputs = util_last_bit64(tcs_outputs_written);
-       uint32_t num_tcs_patch_outputs = util_last_bit64(tcs_per_patch_outputs_written);
-       uint32_t output_vertex_size = num_tcs_outputs * 16;
+       uint32_t output_vertex_size = tcs_num_outputs * 16;
        uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
-       uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+       uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
 
        /* Ensure that we only need one wave per SIMD so we don't need to check
         * resource usage. Also ensures that the number of tcs in and out