radeonsi/gfx10: add as_ngg variant for TES as ES to select Wave32/64
authorMarek Olšák <marek.olsak@amd.com>
Fri, 12 Jul 2019 23:49:30 +0000 (19:49 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 20 Jul 2019 00:16:19 +0000 (20:16 -0400)
Legacy GS has to use Wave64, so TES before GS has to use Wave64 too.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 47cfac4..63e1bda 100644 (file)
@@ -1895,13 +1895,14 @@ static inline bool si_compute_prim_discard_enabled(struct si_context *sctx)
 
 static inline unsigned si_get_wave_size(struct si_screen *sscreen,
                                        enum pipe_shader_type shader_type,
-                                       bool ngg)
+                                       bool ngg, bool es)
 {
        if (shader_type == PIPE_SHADER_COMPUTE)
                return sscreen->compute_wave_size;
        else if (shader_type == PIPE_SHADER_FRAGMENT)
                return sscreen->ps_wave_size;
-       else if (shader_type == PIPE_SHADER_GEOMETRY && !ngg) /* legacy GS only supports Wave64 */
+       else if ((shader_type == PIPE_SHADER_TESS_EVAL && es && !ngg) ||
+                (shader_type == PIPE_SHADER_GEOMETRY && !ngg)) /* legacy GS only supports Wave64 */
                return 64;
        else
                return sscreen->ge_wave_size;
@@ -1910,7 +1911,7 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen,
 static inline unsigned si_get_shader_wave_size(struct si_shader *shader)
 {
        return si_get_wave_size(shader->selector->screen, shader->selector->type,
-                               shader->key.as_ngg);
+                               shader->key.as_ngg, shader->key.as_es);
 }
 
 #define PRINT_ERR(fmt, args...) \
index 4bdaa7f..6fd7a56 100644 (file)
@@ -5727,7 +5727,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
        shader->is_gs_copy_shader = true;
 
        si_init_shader_ctx(&ctx, sscreen, compiler,
-                          si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false));
+                          si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false));
        ctx.shader = shader;
        ctx.type = PIPE_SHADER_VERTEX;
 
@@ -6172,7 +6172,8 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
                }
        }
 
-       if (shader->key.as_ngg && ctx->type != PIPE_SHADER_GEOMETRY) {
+       if (ctx->type != PIPE_SHADER_GEOMETRY &&
+           (shader->key.as_ngg && !shader->key.as_es)) {
                /* Unconditionally declare scratch space base for streamout and
                 * vertex compaction. Whether space is actually allocated is
                 * determined during linking / PM4 creation.
@@ -6219,13 +6220,13 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
                                                ctx->param_merged_wave_info, 0);
                } else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
                           ctx->type == PIPE_SHADER_GEOMETRY ||
-                          shader->key.as_ngg) {
+                          (shader->key.as_ngg && !shader->key.as_es)) {
                        LLVMValueRef num_threads;
                        bool nested_barrier;
 
                        if (!shader->is_monolithic ||
                            (ctx->type == PIPE_SHADER_TESS_EVAL &&
-                            shader->key.as_ngg))
+                            (shader->key.as_ngg && !shader->key.as_es)))
                                ac_init_exec_full_mask(&ctx->ac);
 
                        if (ctx->type == PIPE_SHADER_TESS_CTRL ||
@@ -7048,6 +7049,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                        struct si_shader shader_es = {};
                        shader_es.selector = es;
                        shader_es.key.as_es = 1;
+                       shader_es.key.as_ngg = shader->key.as_ngg;
                        shader_es.key.mono = shader->key.mono;
                        shader_es.key.opt = shader->key.opt;
                        shader_es.is_monolithic = true;
@@ -7305,7 +7307,8 @@ si_get_shader_part(struct si_screen *sscreen,
 
        struct si_shader_context ctx;
        si_init_shader_ctx(&ctx, sscreen, compiler,
-                          si_get_wave_size(sscreen, type, shader.key.as_ngg));
+                          si_get_wave_size(sscreen, type, shader.key.as_ngg,
+                                           shader.key.as_es));
        ctx.shader = &shader;
        ctx.type = type;
 
@@ -7703,10 +7706,15 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen,
                                      struct pipe_debug_callback *debug)
 {
        if (sscreen->info.chip_class >= GFX9) {
-               struct si_shader *es_main_part =
-                       shader->key.part.gs.es->main_shader_part_es;
+               struct si_shader *es_main_part;
+               enum pipe_shader_type es_type = shader->key.part.gs.es->type;
 
-               if (shader->key.part.gs.es->type == PIPE_SHADER_VERTEX &&
+               if (es_type == PIPE_SHADER_TESS_EVAL && shader->key.as_ngg)
+                       es_main_part = shader->key.part.gs.es->main_shader_part_ngg_es;
+               else
+                       es_main_part = shader->key.part.gs.es->main_shader_part_es;
+
+               if (es_type == PIPE_SHADER_VERTEX &&
                    !si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part,
                                      &shader->key.part.gs.vs_prolog))
                        return false;
index 1b4f29b..803045d 100644 (file)
@@ -337,6 +337,7 @@ struct si_shader_selector {
        struct si_shader        *main_shader_part_ls; /* as_ls is set in the key */
        struct si_shader        *main_shader_part_es; /* as_es is set in the key */
        struct si_shader        *main_shader_part_ngg; /* as_ngg is set in the key */
+       struct si_shader        *main_shader_part_ngg_es; /* for Wave32 TES before legacy GS */
 
        struct si_shader        *gs_copy_shader;
 
@@ -789,6 +790,8 @@ si_get_main_shader_part(struct si_shader_selector *sel,
 {
        if (key->as_ls)
                return &sel->main_shader_part_ls;
+       if (key->as_es && key->as_ngg)
+               return &sel->main_shader_part_ngg_es;
        if (key->as_es)
                return &sel->main_shader_part_es;
        if (key->as_ngg)
index 48b8b73..c5e9230 100644 (file)
@@ -1852,10 +1852,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                        key->mono.u.ff_tcs_inputs_to_copy = sctx->vs_shader.cso->outputs_written;
                break;
        case PIPE_SHADER_TESS_EVAL:
+               key->as_ngg = stages_key.u.ngg;
+
                if (sctx->gs_shader.cso)
                        key->as_es = 1;
                else {
-                       key->as_ngg = stages_key.u.ngg;
                        si_shader_selector_key_hw_vs(sctx, sel, key);
 
                        if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
@@ -2269,6 +2270,10 @@ current_not_ready:
                        else
                                assert(0);
 
+                       if (sel->type == PIPE_SHADER_GEOMETRY &&
+                           previous_stage_sel->type == PIPE_SHADER_TESS_EVAL)
+                               shader1_key.as_ngg = key->as_ngg;
+
                        mtx_lock(&previous_stage_sel->mutex);
                        ok = si_check_missing_main_part(sscreen,
                                                        previous_stage_sel,
@@ -2429,7 +2434,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
        if (sel->nir) {
                /* TODO: GS always sets wave size = default. Legacy GS will have
                 * incorrect subgroup_size and ballot_bit_size. */
-               si_lower_nir(sel, si_get_wave_size(sscreen, sel->type, true));
+               si_lower_nir(sel, si_get_wave_size(sscreen, sel->type, true, false));
        }
 
        /* Compile the main shader part for use with a prolog and/or epilog.
@@ -2455,9 +2460,9 @@ static void si_init_shader_selector_async(void *job, int thread_index)
                                              sel->so.num_outputs != 0,
                                              &shader->key);
                if (sscreen->info.chip_class >= GFX10 &&
-                   (((sel->type == PIPE_SHADER_VERTEX ||
-                      sel->type == PIPE_SHADER_TESS_EVAL) &&
+                   ((sel->type == PIPE_SHADER_VERTEX &&
                      !shader->key.as_ls && !shader->key.as_es) ||
+                    sel->type == PIPE_SHADER_TESS_EVAL ||
                     sel->type == PIPE_SHADER_GEOMETRY))
                        shader->key.as_ngg = 1;