radeonsi/gfx9: always compile monolithic ES-GS (asynchronously)

author Marek Olšák <marek.olsak@amd.com>

Tue, 18 Apr 2017 23:53:35 +0000 (01:53 +0200)

committer Marek Olšák <marek.olsak@amd.com>

Fri, 28 Apr 2017 19:47:35 +0000 (21:47 +0200)
author Marek Olšák <marek.olsak@amd.com>
Tue, 18 Apr 2017 23:53:35 +0000 (01:53 +0200)
committer Marek Olšák <marek.olsak@amd.com>
Fri, 28 Apr 2017 19:47:35 +0000 (21:47 +0200)
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h

index 09ddf43..a508ece 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -445,12 +445,20 @@ struct si_shader_key {
         } mono;
  
         /* Optimization flags for asynchronous compilation only. */
-       union {
+       struct {
                 struct {
                         uint64_t        kill_outputs; /* "get_unique_index" bits */
                         uint32_t        kill_outputs2; /* "get_unique_index2" bits */
                         unsigned        clip_disable:1;
                 } hw_vs; /* HW VS (it can be VS, TES, GS) */
+
+               /* For shaders where monolithic variants have better code.
+                *
+                * This is a flag that has no effect on code generation,
+                * but forces monolithic shaders to be used as soon as
+                * possible, because it's in the "opt" group.
+                */
+               unsigned        prefer_mono:1;
         } opt;
  };
  
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c

index b7f848f..b2cdcb7 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1279,6 +1279,25 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                                                           key, &key->part.gs.vs_prolog);
                                 key->part.gs.es = sctx->vs_shader.cso;
                         }
+
+                       /* Merged ES-GS can have unbalanced wave usage.
+                        *
+                        * ES threads are per-vertex, while GS threads are
+                        * per-primitive. So without any amplification, there
+                        * are fewer GS threads than ES threads, which can result
+                        * in empty (no-op) GS waves. With too much amplification,
+                        * there are more GS threads than ES threads, which
+                        * can result in empty (no-op) ES waves.
+                        *
+                        * Non-monolithic shaders are implemented by setting EXEC
+                        * at the beginning of shader parts, and don't jump to
+                        * the end if EXEC is 0.
+                        *
+                        * Monolithic shaders use conditional blocks, so they can
+                        * jump and skip empty waves of ES or GS. So set this to
+                        * always use optimized variants, which are monolithic.
+                        */
+                       key->opt.prefer_mono = 1;
                 }
                 key->part.gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix;
                 break;
author	Marek Olšák <marek.olsak@amd.com>
	Tue, 18 Apr 2017 23:53:35 +0000 (01:53 +0200)
committer	Marek Olšák <marek.olsak@amd.com>
	Fri, 28 Apr 2017 19:47:35 +0000 (21:47 +0200)
src/gallium/drivers/radeonsi/si_shader.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_shaders.c		patch \| blob \| history