radv,aco: add radv_nir_compiler_options::wgp_mode
authorRhys Perry <pendingchaos02@gmail.com>
Mon, 1 Feb 2021 15:14:01 +0000 (15:14 +0000)
committerRhys Perry <pendingchaos02@gmail.com>
Mon, 15 Feb 2021 13:35:36 +0000 (13:35 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8761>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp
src/amd/compiler/aco_ir.cpp
src/amd/compiler/aco_ir.h
src/amd/compiler/tests/helpers.cpp
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h

index efd7be9..44fa069 100644 (file)
@@ -12246,7 +12246,7 @@ void select_trap_handler_shader(Program *program, struct nir_shader *shader,
    assert(args->options->chip_class == GFX8);
 
    init_program(program, compute_cs, args->shader_info,
-                args->options->chip_class, args->options->family, config);
+                args->options->chip_class, args->options->family, args->options->wgp_mode, config);
 
    isel_context ctx = {};
    ctx.program = program;
index 242966b..26466ee 100644 (file)
@@ -1123,7 +1123,7 @@ setup_isel_context(Program* program,
       unreachable("Shader stage not implemented");
 
    init_program(program, Stage { hw_stage, sw_stage }, args->shader_info,
-                args->options->chip_class, args->options->family, config);
+                args->options->chip_class, args->options->family, args->options->wgp_mode, config);
 
    isel_context ctx = {};
    ctx.program = program;
index 8aeca80..7fea714 100644 (file)
@@ -60,7 +60,7 @@ void init()
 
 void init_program(Program *program, Stage stage, struct radv_shader_info *info,
                   enum chip_class chip_class, enum radeon_family family,
-                  ac_shader_config *config)
+                  bool wgp_mode, ac_shader_config *config)
 {
    program->stage = stage;
    program->config = config;
@@ -123,7 +123,7 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info,
       program->sgpr_limit = 104;
    }
 
-   program->wgp_mode = chip_class >= GFX10; /* assume WGP is used on Navi */
+   program->wgp_mode = wgp_mode;
 
    program->next_fp_mode.preserve_signed_zero_inf_nan32 = false;
    program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false;
index 056530c..75af429 100644 (file)
@@ -1904,7 +1904,7 @@ void init();
 
 void init_program(Program *program, Stage stage, struct radv_shader_info *info,
                   enum chip_class chip_class, enum radeon_family family,
-                  ac_shader_config *config);
+                  bool wgp_mode, ac_shader_config *config);
 
 void select_program(Program *program,
                     unsigned shader_count,
index d4224b3..77038ff 100644 (file)
@@ -78,7 +78,7 @@ void create_program(enum chip_class chip_class, Stage stage, unsigned wave_size,
    info.wave_size = wave_size;
 
    program.reset(new Program);
-   aco::init_program(program.get(), stage, &info, chip_class, family, &config);
+   aco::init_program(program.get(), stage, &info, chip_class, family, false, &config);
    program->workgroup_size = UINT_MAX;
    calc_min_waves(program.get());
 
index b6f3073..bbda73a 100644 (file)
@@ -894,6 +894,24 @@ radv_get_shader_binary_size(size_t code_size)
        return code_size + DEBUGGER_NUM_MARKERS * 4;
 }
 
+static bool radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage,
+                                    const struct radv_shader_info *info)
+{
+       enum chip_class chip = device->physical_device->rad_info.chip_class;
+       switch (stage) {
+       case MESA_SHADER_COMPUTE:
+       case MESA_SHADER_TESS_CTRL:
+               return chip >= GFX10;
+       case MESA_SHADER_GEOMETRY:
+               return chip == GFX10 || (chip >= GFX10_3 && !info->is_ngg);
+       case MESA_SHADER_VERTEX:
+       case MESA_SHADER_TESS_EVAL:
+               return chip == GFX10 && info->is_ngg;
+       default:
+               return false;
+       }
+}
+
 static void radv_postprocess_config(const struct radv_device *device,
                                    const struct ac_shader_config *config_in,
                                    const struct radv_shader_info *info,
@@ -956,6 +974,8 @@ static void radv_postprocess_config(const struct radv_device *device,
                config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5);
        }
 
+       bool wgp_mode = radv_should_use_wgp_mode(device, stage, info);
+
        switch (stage) {
        case MESA_SHADER_TESS_EVAL:
                if (info->is_ngg) {
@@ -998,7 +1018,7 @@ static void radv_postprocess_config(const struct radv_device *device,
                                             S_00B12C_EXCP_EN(excp_en);
                }
                config_out->rsrc1 |= S_00B428_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) |
-                                    S_00B428_WGP_MODE(pdevice->rad_info.chip_class >= GFX10);
+                                    S_00B428_WGP_MODE(wgp_mode);
                config_out->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
                break;
        case MESA_SHADER_VERTEX:
@@ -1048,7 +1068,7 @@ static void radv_postprocess_config(const struct radv_device *device,
                break;
        case MESA_SHADER_COMPUTE:
                config_out->rsrc1 |= S_00B848_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) |
-                                    S_00B848_WGP_MODE(pdevice->rad_info.chip_class >= GFX10);
+                                    S_00B848_WGP_MODE(wgp_mode);
                config_out->rsrc2 |=
                        S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
                        S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
@@ -1099,7 +1119,7 @@ static void radv_postprocess_config(const struct radv_device *device,
                 * disable exactly 1 CU per SA for GS.
                 */
                config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) |
-                                    S_00B228_WGP_MODE(pdevice->rad_info.chip_class == GFX10);
+                                    S_00B228_WGP_MODE(wgp_mode);
                config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
                                     S_00B22C_LDS_SIZE(config_in->lds_size) |
                                     S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
@@ -1135,7 +1155,7 @@ static void radv_postprocess_config(const struct radv_device *device,
                }
 
                config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) |
-                                    S_00B228_WGP_MODE(pdevice->rad_info.chip_class >= GFX10);
+                                    S_00B228_WGP_MODE(wgp_mode);
                config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
                                         S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
        } else if (pdevice->rad_info.chip_class >= GFX9 &&
@@ -1435,6 +1455,7 @@ radv_shader_variant_compile(struct radv_device *device,
        options.robust_buffer_access = device->robust_buffer_access;
        options.robust_buffer_access2 = device->robust_buffer_access2;
        options.disable_optimizations = disable_optimizations;
+       options.wgp_mode = radv_should_use_wgp_mode(device, stage, info);
 
        return shader_variant_compile(device, module, shaders, shader_count, stage, info,
                                      &options, false, false,
@@ -1472,6 +1493,7 @@ radv_create_trap_handler_shader(struct radv_device *device)
        nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_trap_handler");
 
        options.explicit_scratch_args = true;
+       options.wgp_mode = radv_should_use_wgp_mode(device, MESA_SHADER_COMPUTE, &info);
        info.wave_size = 64;
 
        shader = shader_variant_compile(device, NULL, &b.shader, 1,
index 5e25261..8b5a2fe 100644 (file)
@@ -146,6 +146,7 @@ struct radv_nir_compiler_options {
        bool use_ngg_streamout;
        bool enable_mrt_output_nan_fixup;
        bool disable_optimizations; /* only used by ACO */
+       bool wgp_mode;
        enum radeon_family family;
        enum chip_class chip_class;
        uint32_t tess_offchip_block_dw_size;