radeonsi: fix color inputs/outputs for GS and tess
authorMarek Olšák <marek.olsak@amd.com>
Wed, 23 May 2018 04:02:10 +0000 (00:02 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 25 May 2018 20:46:00 +0000 (16:46 -0400)
GS is tested, tessellation is untested.

Have outputs_written_before_ps for HW VS and outputs_written for other
stages. The reason is that COLOR and BCOLOR alias for HW VS, which
drives elimination of VS outputs based on PS inputs.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 0d24c3a..6734a16 100644 (file)
@@ -191,7 +191,8 @@ unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned in
  * less than 64, so that a 64-bit bitmask of used inputs or outputs can be
  * calculated.
  */
-unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
+unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
+                                      unsigned is_varying)
 {
        switch (semantic_name) {
        case TGSI_SEMANTIC_POSITION:
@@ -220,14 +221,20 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
                return SI_MAX_IO_GENERIC + 6;
        case TGSI_SEMANTIC_PRIMID:
                return SI_MAX_IO_GENERIC + 7;
-       case TGSI_SEMANTIC_COLOR: /* these alias */
-       case TGSI_SEMANTIC_BCOLOR:
+       case TGSI_SEMANTIC_COLOR:
                assert(index < 2);
                return SI_MAX_IO_GENERIC + 8 + index;
+       case TGSI_SEMANTIC_BCOLOR:
+               assert(index < 2);
+               /* If it's a varying, COLOR and BCOLOR alias. */
+               if (is_varying)
+                       return SI_MAX_IO_GENERIC + 8 + index;
+               else
+                       return SI_MAX_IO_GENERIC + 10 + index;
        case TGSI_SEMANTIC_TEXCOORD:
                assert(index < 8);
-               assert(SI_MAX_IO_GENERIC + 10 + index < 64);
-               return SI_MAX_IO_GENERIC + 10 + index;
+               STATIC_ASSERT(SI_MAX_IO_GENERIC + 12 + 8 <= 64);
+               return SI_MAX_IO_GENERIC + 12 + index;
        default:
                assert(!"invalid semantic name");
                return 0;
@@ -860,7 +867,7 @@ static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context
                si_shader_io_get_unique_index_patch(name[input_index],
                                                    index[input_index]) :
                si_shader_io_get_unique_index(name[input_index],
-                                             index[input_index]);
+                                             index[input_index], false);
 
        /* Add the base address of the element. */
        return LLVMBuildAdd(ctx->ac.builder, base_addr,
@@ -1015,7 +1022,7 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(
 
        param_index_base = is_patch ?
                si_shader_io_get_unique_index_patch(name[param_base], index[param_base]) :
-               si_shader_io_get_unique_index(name[param_base], index[param_base]);
+               si_shader_io_get_unique_index(name[param_base], index[param_base], false);
 
        if (param_index) {
                param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
@@ -1622,7 +1629,7 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
        unsigned param;
        LLVMValueRef value;
 
-       param = si_shader_io_get_unique_index(semantic_name, semantic_index);
+       param = si_shader_io_get_unique_index(semantic_name, semantic_index, false);
 
        /* GFX9 has the ESGS ring in LDS. */
        if (ctx->screen->info.chip_class >= GFX9) {
@@ -2916,7 +2923,8 @@ static void si_build_param_exports(struct si_shader_context *ctx,
                if ((semantic_name != TGSI_SEMANTIC_GENERIC ||
                     semantic_index < SI_MAX_IO_GENERIC) &&
                    shader->key.opt.kill_outputs &
-                   (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
+                   (1ull << si_shader_io_get_unique_index(semantic_name,
+                                                          semantic_index, true)))
                        continue;
 
                si_export_param(ctx, param_count, outputs[i].values);
@@ -3546,7 +3554,7 @@ static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi,
                    name == TGSI_SEMANTIC_VIEWPORT_INDEX)
                        continue;
 
-               int param = si_shader_io_get_unique_index(name, index);
+               int param = si_shader_io_get_unique_index(name, index, false);
                LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
                                        LLVMConstInt(ctx->i32, param * 4, 0), "");
 
@@ -3595,7 +3603,7 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
                        continue;
 
                param = si_shader_io_get_unique_index(info->output_semantic_name[i],
-                                                     info->output_semantic_index[i]);
+                                                     info->output_semantic_index[i], false);
 
                for (chan = 0; chan < 4; chan++) {
                        if (!(info->output_usagemask[i] & (1 << chan)))
index 94366f4..555ca59 100644 (file)
@@ -152,7 +152,7 @@ struct si_context;
 /* Shader IO unique indices are supported for TGSI_SEMANTIC_GENERIC with an
  * index smaller than this.
  */
-#define SI_MAX_IO_GENERIC       46
+#define SI_MAX_IO_GENERIC       44
 
 /* SGPR user data indices */
 enum {
@@ -393,6 +393,7 @@ struct si_shader_selector {
        /* CS parameters */
        unsigned local_size;
 
+       uint64_t        outputs_written_before_ps; /* "get_unique_index" bits */
        uint64_t        outputs_written;        /* "get_unique_index" bits */
        uint32_t        patch_outputs_written;  /* "get_unique_index_patch" bits */
 
@@ -668,7 +669,8 @@ int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
                     struct pipe_debug_callback *debug);
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
-unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
+unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
+                                      unsigned is_varying);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
 void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
                    struct pipe_debug_callback *debug, unsigned processor,
index 6d558c9..1f44366 100644 (file)
@@ -1223,12 +1223,12 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
        }
 
        /* Find out which VS outputs aren't used by the PS. */
-       uint64_t outputs_written = vs->outputs_written;
+       uint64_t outputs_written = vs->outputs_written_before_ps;
        uint64_t inputs_read = 0;
 
        /* ignore POSITION, PSIZE */
-       outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0)) |
-                            (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0)));
+       outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0, true)) |
+                            (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0, true)));
 
        if (!ps_disabled) {
                inputs_read = ps->inputs_read;
@@ -1927,8 +1927,8 @@ static void si_init_shader_selector_async(void *job, int thread_index)
                                                break;
                                        /* fall through */
                                default:
-                                       id = si_shader_io_get_unique_index(name, index);
-                                       sel->outputs_written &= ~(1ull << id);
+                                       id = si_shader_io_get_unique_index(name, index, true);
+                                       sel->outputs_written_before_ps &= ~(1ull << id);
                                        break;
                                case TGSI_SEMANTIC_POSITION: /* ignore these */
                                case TGSI_SEMANTIC_PSIZE:
@@ -2101,7 +2101,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                                /* fall through */
                        default:
                                sel->outputs_written |=
-                                       1ull << si_shader_io_get_unique_index(name, index);
+                                       1ull << si_shader_io_get_unique_index(name, index, false);
+                               sel->outputs_written_before_ps |=
+                                       1ull << si_shader_io_get_unique_index(name, index, true);
                                break;
                        case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
                        case TGSI_SEMANTIC_EDGEFLAG:
@@ -2115,6 +2117,8 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                 */
                if (sctx->chip_class >= GFX9)
                        sel->esgs_itemsize += 4;
+
+               assert(((sel->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
                break;
 
        case PIPE_SHADER_FRAGMENT:
@@ -2130,7 +2134,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                                /* fall through */
                        default:
                                sel->inputs_read |=
-                                       1ull << si_shader_io_get_unique_index(name, index);
+                                       1ull << si_shader_io_get_unique_index(name, index, true);
                                break;
                        case TGSI_SEMANTIC_PCOORD: /* ignore this */
                                break;